mirror of
https://github.com/grafana/grafana.git
synced 2025-12-22 12:44:34 +08:00
Compare commits
10 Commits
zoltan/pos
...
titolins/a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3efe99482 | ||
|
|
d72c046df2 | ||
|
|
1229a15cfd | ||
|
|
c833faf7ec | ||
|
|
ea7e2d7e7d | ||
|
|
ebec073d3c | ||
|
|
810da084b9 | ||
|
|
4808803e0d | ||
|
|
b5e11cf8e9 | ||
|
|
a551bb7f61 |
4
go.mod
4
go.mod
@@ -87,7 +87,7 @@ require (
|
||||
github.com/googleapis/gax-go/v2 v2.15.0 // @grafana/grafana-backend-group
|
||||
github.com/gorilla/mux v1.8.1 // @grafana/grafana-backend-group
|
||||
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // @grafana/grafana-app-platform-squad
|
||||
github.com/grafana/alerting v0.0.0-20251119204204-77fa75125181 // @grafana/alerting-backend
|
||||
github.com/grafana/alerting v0.0.0-20251128214501-29edef10542b // @grafana/alerting-backend
|
||||
github.com/grafana/authlib v0.0.0-20250930082137-a40e2c2b094f // @grafana/identity-access-team
|
||||
github.com/grafana/authlib/types v0.0.0-20251119142549-be091cf2f4d4 // @grafana/identity-access-team
|
||||
github.com/grafana/dataplane/examples v0.0.1 // @grafana/observability-metrics
|
||||
@@ -697,7 +697,7 @@ require (
|
||||
replace github.com/crewjam/saml => github.com/grafana/saml v0.4.15-0.20240917091248-ae3bbdad8a56
|
||||
|
||||
// Use our fork of the upstream Alertmanager.
|
||||
replace github.com/prometheus/alertmanager => github.com/grafana/prometheus-alertmanager v0.25.1-0.20250911094103-5456b6e45604
|
||||
replace github.com/prometheus/alertmanager => github.com/grafana/prometheus-alertmanager v0.25.1-0.20251128214251-5dfe7baf90af
|
||||
|
||||
exclude github.com/mattn/go-sqlite3 v2.0.3+incompatible
|
||||
|
||||
|
||||
10
go.sum
10
go.sum
@@ -1611,8 +1611,10 @@ github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7Fsg
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
|
||||
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
|
||||
github.com/grafana/alerting v0.0.0-20251119204204-77fa75125181 h1:nbxKRtrbuhvOYmI2RhOYauHRJCtpR+vTNIgg1lFUCws=
|
||||
github.com/grafana/alerting v0.0.0-20251119204204-77fa75125181/go.mod h1:VtPNIFlEOJPPEc13Ax6ZTbNV3M/sAzLID72YjgzOPVA=
|
||||
github.com/grafana/alerting v0.0.0-20251119223942-7e109cc4f3e9 h1:qHY1ibKD/S5CyYi1VtUGKSq5kbgA7dTv/OYtlEMklqg=
|
||||
github.com/grafana/alerting v0.0.0-20251119223942-7e109cc4f3e9/go.mod h1:VtPNIFlEOJPPEc13Ax6ZTbNV3M/sAzLID72YjgzOPVA=
|
||||
github.com/grafana/alerting v0.0.0-20251128214501-29edef10542b h1:QI2xO0UVmGtkv6CuQSCaoW0uw3SV7MDsC/Mwehl75II=
|
||||
github.com/grafana/alerting v0.0.0-20251128214501-29edef10542b/go.mod h1:QAWfUswv2DaC5dD45o3vXia/QDvLi17jT4u4UToeF7A=
|
||||
github.com/grafana/authlib v0.0.0-20250930082137-a40e2c2b094f h1:Cbm6OKkOcJ+7CSZsGsEJzktC/SIa5bxVeYKQLuYK86o=
|
||||
github.com/grafana/authlib v0.0.0-20250930082137-a40e2c2b094f/go.mod h1:axY0cdOg3q0TZHwpHnIz5x16xZ8ZBxJHShsSHHXcHQg=
|
||||
github.com/grafana/authlib/types v0.0.0-20251119142549-be091cf2f4d4 h1:Muoy+FMGrHj3GdFbvsMzUT7eusgii9PKf9L1ZaXDDbY=
|
||||
@@ -1665,8 +1667,8 @@ github.com/grafana/nanogit v0.3.0 h1:XNEef+4Vi+465ZITJs/g/xgnDRJbWhhJ7iQrAnWZ0oQ
|
||||
github.com/grafana/nanogit v0.3.0/go.mod h1:6s6CCTpyMOHPpcUZaLGI+rgBEKdmxVbhqSGgCK13j7Y=
|
||||
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
|
||||
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
|
||||
github.com/grafana/prometheus-alertmanager v0.25.1-0.20250911094103-5456b6e45604 h1:aXfUhVN/Ewfpbko2CCtL65cIiGgwStOo4lWH2b6gw2U=
|
||||
github.com/grafana/prometheus-alertmanager v0.25.1-0.20250911094103-5456b6e45604/go.mod h1:O/QP1BCm0HHIzbKvgMzqb5sSyH88rzkFk84F4TfJjBU=
|
||||
github.com/grafana/prometheus-alertmanager v0.25.1-0.20251128214251-5dfe7baf90af h1:X5p+fjoW+062S7GmTIyG3c7GcSVhCPo+/2+5Hgah6AQ=
|
||||
github.com/grafana/prometheus-alertmanager v0.25.1-0.20251128214251-5dfe7baf90af/go.mod h1:O/QP1BCm0HHIzbKvgMzqb5sSyH88rzkFk84F4TfJjBU=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og=
|
||||
github.com/grafana/pyroscope-go/godeltaprof v0.1.9/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
|
||||
github.com/grafana/pyroscope/api v1.2.1-0.20250415190842-3ff7247547ae h1:35W3Wjp9KWnSoV/DuymmyIj5aHE0CYlDQ5m2KeXUPAc=
|
||||
|
||||
@@ -1932,6 +1932,7 @@ golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210503080704-8803ae5d1324/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
|
||||
@@ -1201,4 +1201,8 @@ export interface FeatureToggles {
|
||||
* Adds support for Kubernetes alerting historian APIs
|
||||
*/
|
||||
kubernetesAlertingHistorian?: boolean;
|
||||
/**
|
||||
* Use synchronized dispatch timer to minimize duplicate notifications across alertmanager HA pods
|
||||
*/
|
||||
alertingSyncDispatchTimer?: boolean;
|
||||
}
|
||||
|
||||
@@ -1984,6 +1984,14 @@ var (
|
||||
Owner: grafanaAlertingSquad,
|
||||
RequiresRestart: true,
|
||||
},
|
||||
{
|
||||
Name: "alertingSyncDispatchTimer",
|
||||
Description: "Use synchronized dispatch timer to minimize duplicate notifications across alertmanager HA pods",
|
||||
Stage: FeatureStageExperimental,
|
||||
Owner: grafanaAlertingSquad,
|
||||
RequiresRestart: true,
|
||||
HideFromDocs: true,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
1
pkg/services/featuremgmt/toggles_gen.csv
generated
1
pkg/services/featuremgmt/toggles_gen.csv
generated
@@ -269,3 +269,4 @@ ttlPluginInstanceManager,experimental,@grafana/plugins-platform-backend,false,fa
|
||||
lokiQueryLimitsContext,experimental,@grafana/observability-logs,false,false,true
|
||||
rudderstackUpgrade,experimental,@grafana/grafana-frontend-platform,false,false,true
|
||||
kubernetesAlertingHistorian,experimental,@grafana/alerting-squad,false,true,false
|
||||
alertingSyncDispatchTimer,experimental,@grafana/alerting-squad,false,true,false
|
||||
|
||||
|
4
pkg/services/featuremgmt/toggles_gen.go
generated
4
pkg/services/featuremgmt/toggles_gen.go
generated
@@ -765,4 +765,8 @@ const (
|
||||
// FlagKubernetesAlertingHistorian
|
||||
// Adds support for Kubernetes alerting historian APIs
|
||||
FlagKubernetesAlertingHistorian = "kubernetesAlertingHistorian"
|
||||
|
||||
// FlagAlertingSyncDispatchTimer
|
||||
// Use synchronized dispatch timer to minimize duplicate notifications across alertmanager HA pods
|
||||
FlagAlertingSyncDispatchTimer = "alertingSyncDispatchTimer"
|
||||
)
|
||||
|
||||
20
pkg/services/featuremgmt/toggles_gen.json
generated
20
pkg/services/featuremgmt/toggles_gen.json
generated
@@ -498,6 +498,24 @@
|
||||
"codeowner": "@grafana/alerting-squad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingSyncDispatchTimer",
|
||||
"resourceVersion": "1764342211090",
|
||||
"creationTimestamp": "2025-11-19T19:22:33Z",
|
||||
"deletionTimestamp": "2025-11-27T17:14:53Z",
|
||||
"annotations": {
|
||||
"grafana.app/updatedTimestamp": "2025-11-28 15:03:31.090159 +0000 UTC"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"description": "Use synchronized dispatch timer to minimize duplicate notifications across alertmanager HA pods",
|
||||
"stage": "experimental",
|
||||
"codeowner": "@grafana/alerting-squad",
|
||||
"requiresRestart": true,
|
||||
"hideFromDocs": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingTriage",
|
||||
@@ -3646,4 +3664,4 @@
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -222,6 +222,7 @@ func (ng *AlertNG) init() error {
|
||||
ExternalURL: ng.Cfg.AppURL,
|
||||
SmtpConfig: smtpCfg,
|
||||
Timeout: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Timeout,
|
||||
DispatchTimer: notifier.GetDispatchTimer(ng.FeatureToggles).String(),
|
||||
}
|
||||
autogenFn := func(ctx context.Context, logger log.Logger, orgID int64, cfg *definitions.PostableApiAlertingConfig, invalidReceiverAction notifier.InvalidReceiversAction) error {
|
||||
return notifier.AddAutogenConfig(ctx, logger, ng.store, orgID, cfg, invalidReceiverAction, ng.FeatureToggles)
|
||||
|
||||
@@ -32,6 +32,9 @@ const (
|
||||
|
||||
// How long we keep silences in the kvstore after they've expired.
|
||||
silenceRetention = 5 * 24 * time.Hour
|
||||
|
||||
// How long we keep flushes in the kvstore after they've expired.
|
||||
flushRetention = 5 * 24 * time.Hour
|
||||
)
|
||||
|
||||
type AlertingStore interface {
|
||||
@@ -43,8 +46,10 @@ type AlertingStore interface {
|
||||
type stateStore interface {
|
||||
SaveSilences(ctx context.Context, st alertingNotify.State) (int64, error)
|
||||
SaveNotificationLog(ctx context.Context, st alertingNotify.State) (int64, error)
|
||||
SaveFlushLog(ctx context.Context, st alertingNotify.State) (int64, error)
|
||||
GetSilences(ctx context.Context) (string, error)
|
||||
GetNotificationLog(ctx context.Context) (string, error)
|
||||
GetFlushLog(ctx context.Context) (string, error)
|
||||
}
|
||||
|
||||
type alertmanager struct {
|
||||
@@ -99,6 +104,10 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
flushLog, err := stateStore.GetFlushLog(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
silencesOptions := maintenanceOptions{
|
||||
initialState: silences,
|
||||
@@ -121,12 +130,29 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
}
|
||||
l := log.New("ngalert.notifier")
|
||||
|
||||
dispatchTimer := GetDispatchTimer(featureToggles)
|
||||
|
||||
var flushLogOptions maintenanceOptions
|
||||
if dispatchTimer == alertingNotify.DispatchTimerSync {
|
||||
flushLogOptions = maintenanceOptions{
|
||||
initialState: flushLog,
|
||||
retention: flushRetention,
|
||||
maintenanceFrequency: maintenanceInterval,
|
||||
maintenanceFunc: func(state alertingNotify.State) (int64, error) {
|
||||
// Detached context here is to make sure that when the service is shut down the persist operation is executed.
|
||||
return stateStore.SaveFlushLog(context.Background(), state)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
opts := alertingNotify.GrafanaAlertmanagerOpts{
|
||||
ExternalURL: cfg.AppURL,
|
||||
AlertStoreCallback: nil,
|
||||
PeerTimeout: cfg.UnifiedAlerting.HAPeerTimeout,
|
||||
Silences: silencesOptions,
|
||||
Nflog: nflogOptions,
|
||||
FlushLog: flushLogOptions,
|
||||
DispatchTimer: dispatchTimer,
|
||||
Limits: alertingNotify.Limits{
|
||||
MaxSilences: cfg.UnifiedAlerting.AlertmanagerMaxSilencesCount,
|
||||
MaxSilenceSizeBytes: cfg.UnifiedAlerting.AlertmanagerMaxSilenceSizeBytes,
|
||||
|
||||
19
pkg/services/ngalert/notifier/dispatch_timer.go
Normal file
19
pkg/services/ngalert/notifier/dispatch_timer.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
alertingNotify "github.com/grafana/alerting/notify"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
)
|
||||
|
||||
// GetDispatchTimer returns the appropriate dispatch timer based on feature toggles.
|
||||
func GetDispatchTimer(features featuremgmt.FeatureToggles) (dt alertingNotify.DispatchTimer) {
|
||||
//nolint:staticcheck // not yet migrated to OpenFeature
|
||||
enabled := features.IsEnabledGlobally(featuremgmt.FlagAlertingSyncDispatchTimer)
|
||||
log.New("ngalert.dispatchTimer").Info("GetDispatchTimer called", "enabled", enabled, "result", dt.String())
|
||||
|
||||
if enabled {
|
||||
dt = alertingNotify.DispatchTimerSync
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -15,6 +15,7 @@ const (
|
||||
KVNamespace = "alertmanager"
|
||||
NotificationLogFilename = "notifications"
|
||||
SilencesFilename = "silences"
|
||||
FlushLogFilename = "flushes"
|
||||
)
|
||||
|
||||
// FileStore is in charge of persisting the alertmanager files to the database.
|
||||
@@ -42,6 +43,10 @@ func (fileStore *FileStore) GetNotificationLog(ctx context.Context) (string, err
|
||||
return fileStore.contentFor(ctx, NotificationLogFilename)
|
||||
}
|
||||
|
||||
func (fileStore *FileStore) GetFlushLog(ctx context.Context) (string, error) {
|
||||
return fileStore.contentFor(ctx, FlushLogFilename)
|
||||
}
|
||||
|
||||
// contentFor returns the content for the given Alertmanager kvstore key.
|
||||
func (fileStore *FileStore) contentFor(ctx context.Context, filename string) (string, error) {
|
||||
// Then, let's attempt to read it from the database.
|
||||
@@ -74,6 +79,11 @@ func (fileStore *FileStore) SaveNotificationLog(ctx context.Context, st alerting
|
||||
return fileStore.persist(ctx, NotificationLogFilename, st)
|
||||
}
|
||||
|
||||
// SaveFlushLog saves the flush log to the database and returns the size of the unencoded state.
|
||||
func (fileStore *FileStore) SaveFlushLog(ctx context.Context, st alertingNotify.State) (int64, error) {
|
||||
return fileStore.persist(ctx, FlushLogFilename, st)
|
||||
}
|
||||
|
||||
// persist takes care of persisting the binary representation of internal state to the database as a base64 encoded string.
|
||||
func (fileStore *FileStore) persist(ctx context.Context, filename string, st alertingNotify.State) (int64, error) {
|
||||
var size int64
|
||||
|
||||
@@ -5,5 +5,9 @@ func (am *alertmanager) MergeState(state ExternalState) error {
|
||||
if err := am.Base.MergeNflog(state.Nflog); err != nil {
|
||||
return err
|
||||
}
|
||||
return am.Base.MergeSilences(state.Silences)
|
||||
if err := am.Base.MergeSilences(state.Silences); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
// return am.Base.MergeFlushLog(state.FlushLog)
|
||||
}
|
||||
|
||||
@@ -84,6 +84,8 @@ type Alertmanager struct {
|
||||
|
||||
promoteConfig bool
|
||||
externalURL string
|
||||
|
||||
dispatchTimer string
|
||||
}
|
||||
|
||||
type AlertmanagerConfig struct {
|
||||
@@ -109,6 +111,9 @@ type AlertmanagerConfig struct {
|
||||
|
||||
// Timeout for the HTTP client.
|
||||
Timeout time.Duration
|
||||
|
||||
// DispatchTimer specifies which timer to use in the dispatcher
|
||||
DispatchTimer string
|
||||
}
|
||||
|
||||
func (cfg *AlertmanagerConfig) Validate() error {
|
||||
@@ -201,6 +206,7 @@ func NewAlertmanager(ctx context.Context, cfg AlertmanagerConfig, store stateSto
|
||||
externalURL: cfg.ExternalURL,
|
||||
promoteConfig: cfg.PromoteConfig,
|
||||
smtp: cfg.SmtpConfig,
|
||||
dispatchTimer: cfg.DispatchTimer,
|
||||
}
|
||||
|
||||
// Parse the default configuration once and remember its hash so we can compare it later.
|
||||
@@ -343,10 +349,11 @@ func (am *Alertmanager) buildConfiguration(ctx context.Context, raw []byte, crea
|
||||
AlertmanagerConfig: mergeResult.Config,
|
||||
Templates: templates,
|
||||
},
|
||||
CreatedAt: createdAtEpoch,
|
||||
Promoted: am.promoteConfig,
|
||||
ExternalURL: am.externalURL,
|
||||
SmtpConfig: am.smtp,
|
||||
CreatedAt: createdAtEpoch,
|
||||
Promoted: am.promoteConfig,
|
||||
ExternalURL: am.externalURL,
|
||||
SmtpConfig: am.smtp,
|
||||
DispatchTimer: am.dispatchTimer,
|
||||
}
|
||||
|
||||
cfgHash, err := calculateUserGrafanaConfigHash(payload)
|
||||
|
||||
@@ -37,6 +37,7 @@ type UserGrafanaConfig struct {
|
||||
Promoted bool `json:"promoted"`
|
||||
ExternalURL string `json:"external_url"`
|
||||
SmtpConfig SmtpConfig `json:"smtp_config"`
|
||||
DispatchTimer string `json:"dispatch_timer"`
|
||||
}
|
||||
|
||||
func (mc *Mimir) GetGrafanaAlertmanagerConfig(ctx context.Context) (*UserGrafanaConfig, error) {
|
||||
|
||||
Reference in New Issue
Block a user