Compare commits

...

4 Commits

Author SHA1 Message Date
maicon
d68451b70e provisioning: acquire server lock before provisioning dashboards+folders (#114488)
* provisioning: acquire server lock before provisioning dashboards+folders

Signed-off-by: Maicon Costa <maiconscosta@gmail.com>

---------

Signed-off-by: Maicon Costa <maiconscosta@gmail.com>
2025-12-05 12:59:22 -03:00
Stephanie Hingtgen
2be78f8e63 Dashboards: Prevent panic in validation (#114436) 2025-12-05 12:57:49 -03:00
Renato Costa
076d378adc fix: update search request for existing provisioned dashboards in modes 3+ (#114412)
Fix search for existing provisioned dashboards in modes 3+

The search query was not requesting the dashboard's "legacy ID". As a result,
the provisioning process would not find existing provisioned dashboards, making
copies of these dashboards every time there was a change in the provisioned
dashboard's definition.

Signed-off-by: Maicon Costa <maiconscosta@gmail.com>
2025-12-05 12:32:30 -03:00
Daniele Stefano Ferru
6b854bc57d Hotfix: Granting None role Viewer access for a fixed API group list (#114310) 2025-12-04 12:35:23 -05:00
11 changed files with 127 additions and 23 deletions

View File

@@ -66,6 +66,9 @@ func ValidateDashboardSpec(obj *Dashboard, forceValidation bool) (field.ErrorLis
}
func formatErrorPath(path []string) string {
if len(path) <= 4 {
return strings.Join(path, ".")
}
// omitting the "lineage.schemas[0].schema.spec" prefix here.
return strings.Join(path[4:], ".")
}

View File

@@ -67,6 +67,9 @@ func ValidateDashboardSpec(obj *Dashboard, forceValidation bool) (field.ErrorLis
}
func formatErrorPath(path []string) string {
if len(path) <= 4 {
return strings.Join(path, ".")
}
// omitting the "lineage.schemas[0].schema.spec" prefix here.
return strings.Join(path[4:], ".")
}

View File

@@ -15,7 +15,6 @@ import (
_ "github.com/blugelabs/bluge"
_ "github.com/blugelabs/bluge_segment_api"
_ "github.com/crewjam/saml"
_ "github.com/docker/go-connections/nat"
_ "github.com/go-jose/go-jose/v4"
_ "github.com/gobwas/glob"
_ "github.com/googleapis/gax-go/v2"
@@ -31,7 +30,6 @@ import (
_ "github.com/spf13/cobra" // used by the standalone apiserver cli
_ "github.com/spyzhov/ajson"
_ "github.com/stretchr/testify/require"
_ "github.com/testcontainers/testcontainers-go"
_ "gocloud.dev/secrets/awskms"
_ "gocloud.dev/secrets/azurekeyvault"
_ "gocloud.dev/secrets/gcpkms"
@@ -56,7 +54,9 @@ import (
_ "github.com/grafana/e2e"
_ "github.com/grafana/gofpdf"
_ "github.com/grafana/gomemcache/memcache"
_ "github.com/grafana/tempo/pkg/traceql"
_ "github.com/grafana/grafana/apps/alerting/alertenrichment/pkg/apis/alertenrichment/v1beta1"
_ "github.com/grafana/grafana/apps/scope/pkg/apis/scope/v0alpha1"
_ "github.com/grafana/tempo/pkg/traceql"
_ "github.com/testcontainers/testcontainers-go"
)

View File

@@ -666,7 +666,7 @@ func Initialize(ctx context.Context, cfg *setting.Cfg, opts Options, apiOpts api
azurePromMigrationService := promtypemigration.ProvideAzurePromMigrationService(service15, inMemory, repoManager, pluginInstaller, cfg)
amazonPromMigrationService := promtypemigration.ProvideAmazonPromMigrationService(service15, inMemory, repoManager, pluginInstaller, cfg)
promTypeMigrationProviderImpl := promtypemigration.ProvidePromTypeMigrationProvider(serverLockService, featureToggles, azurePromMigrationService, amazonPromMigrationService)
provisioningServiceImpl, err := provisioning.ProvideService(accessControl, cfg, sqlStore, pluginstoreService, dBstore, serviceService, notificationService, dashboardProvisioningService, service15, correlationsService, dashboardService, folderimplService, service13, searchService, quotaService, secretsService, orgService, receiverPermissionsService, tracingService, dualwriteService, promTypeMigrationProviderImpl)
provisioningServiceImpl, err := provisioning.ProvideService(accessControl, cfg, sqlStore, pluginstoreService, dBstore, serviceService, notificationService, dashboardProvisioningService, service15, correlationsService, dashboardService, folderimplService, service13, searchService, quotaService, secretsService, orgService, receiverPermissionsService, tracingService, dualwriteService, promTypeMigrationProviderImpl, serverLockService)
if err != nil {
return nil, err
}
@@ -1309,7 +1309,7 @@ func InitializeForTest(ctx context.Context, t sqlutil.ITestDB, testingT interfac
azurePromMigrationService := promtypemigration.ProvideAzurePromMigrationService(service15, inMemory, repoManager, pluginInstaller, cfg)
amazonPromMigrationService := promtypemigration.ProvideAmazonPromMigrationService(service15, inMemory, repoManager, pluginInstaller, cfg)
promTypeMigrationProviderImpl := promtypemigration.ProvidePromTypeMigrationProvider(serverLockService, featureToggles, azurePromMigrationService, amazonPromMigrationService)
provisioningServiceImpl, err := provisioning.ProvideService(accessControl, cfg, sqlStore, pluginstoreService, dBstore, serviceService, notificationService, dashboardProvisioningService, service15, correlationsService, dashboardService, folderimplService, service13, searchService, quotaService, secretsService, orgService, receiverPermissionsService, tracingService, dualwriteService, promTypeMigrationProviderImpl)
provisioningServiceImpl, err := provisioning.ProvideService(accessControl, cfg, sqlStore, pluginstoreService, dBstore, serviceService, notificationService, dashboardProvisioningService, service15, correlationsService, dashboardService, folderimplService, service13, searchService, quotaService, secretsService, orgService, receiverPermissionsService, tracingService, dualwriteService, promTypeMigrationProviderImpl, serverLockService)
if err != nil {
return nil, err
}

View File

@@ -3,6 +3,7 @@ package authorizer
import (
"context"
"fmt"
"slices"
"k8s.io/apiserver/pkg/authorization/authorizer"
@@ -12,6 +13,10 @@ import (
var _ authorizer.Authorizer = &roleAuthorizer{}
var orgRoleNoneAsViewerAPIGroups = []string{
"productactivation.ext.grafana.com",
}
type roleAuthorizer struct{}
func newRoleAuthorizer() *roleAuthorizer {
@@ -43,6 +48,16 @@ func (auth roleAuthorizer) Authorize(ctx context.Context, a authorizer.Attribute
return authorizer.DecisionDeny, errorMessageForGrafanaOrgRole(orgRole, a), nil
}
case org.RoleNone:
// HOTFIX: granting Viewer actions to None roles to a fixed group of APIs,
// while we work on a proper fix.
if slices.Contains(orgRoleNoneAsViewerAPIGroups, a.GetAPIGroup()) {
switch a.GetVerb() {
case "get", "list", "watch":
return authorizer.DecisionAllow, "", nil
default:
return authorizer.DecisionDeny, errorMessageForGrafanaOrgRole(orgRole, a), nil
}
}
return authorizer.DecisionDeny, errorMessageForGrafanaOrgRole(orgRole, a), nil
}
return authorizer.DecisionDeny, "", nil

View File

@@ -2009,7 +2009,14 @@ func (dr *DashboardServiceImpl) searchDashboardsThroughK8sRaw(ctx context.Contex
request.Limit = query.Limit
request.Page = query.Page
request.Offset = (query.Page - 1) * query.Limit // only relevant when running in modes 3+
request.Fields = dashboardsearch.IncludeFields
request.Fields = append(
dashboardsearch.IncludeFields,
// Include the dashboard legacy ID in the results, as it is needed when
// determining whether a provisioned dashboard exists or not, see
// `(*DashboardServiceImpl).searchProvisionedDashboardsThroughK8s`.
resource.SEARCH_FIELD_LEGACY_ID,
resource.SEARCH_FIELD_LABELS+"."+resource.SEARCH_FIELD_LEGACY_ID,
)
namespace := dr.k8sclient.GetNamespace(query.OrgId)
var err error

View File

@@ -2016,6 +2016,26 @@ func TestSearchDashboardsThroughK8sRaw(t *testing.T) {
_, err := service.searchDashboardsThroughK8s(ctx, query)
require.NoError(t, err)
})
t.Run("search will request legacy dashboard ID", func(t *testing.T) {
ctx := context.Background()
k8sCliMock := new(client.MockK8sHandler)
service := &DashboardServiceImpl{k8sclient: k8sCliMock}
query := &dashboards.FindPersistedDashboardsQuery{
ManagedBy: utils.ManagerKindClassicFP, //nolint:staticcheck
OrgId: 1,
}
k8sCliMock.On("GetNamespace", mock.Anything, mock.Anything).Return("default")
k8sCliMock.On("Search", mock.Anything, mock.Anything, mock.MatchedBy(func(req *resourcepb.ResourceSearchRequest) bool {
return slices.Contains(req.Fields, "grafana.app/deprecatedInternalID") &&
slices.Contains(req.Fields, "labels.grafana.app/deprecatedInternalID")
})).Return(&resourcepb.ResourceSearchResponse{
Results: &resourcepb.ResourceTable{},
TotalHits: 0,
}, nil)
_, err := service.searchDashboardsThroughK8s(ctx, query)
require.NoError(t, err)
})
}
func TestSearchProvisionedDashboardsThroughK8sRaw(t *testing.T) {

View File

@@ -2,6 +2,7 @@ package dashboards
import (
"context"
"errors"
"fmt"
"os"
"time"
@@ -9,10 +10,12 @@ import (
dashboardV1 "github.com/grafana/grafana/apps/dashboard/pkg/apis/dashboard/v1beta1"
folderV1 "github.com/grafana/grafana/apps/folder/pkg/apis/folder/v1beta1"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/serverlock"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/folder"
"github.com/grafana/grafana/pkg/services/org"
"github.com/grafana/grafana/pkg/services/provisioning/utils"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/storage/legacysql/dualwrite"
)
@@ -28,7 +31,7 @@ type DashboardProvisioner interface {
}
// DashboardProvisionerFactory creates DashboardProvisioners based on input
type DashboardProvisionerFactory func(context.Context, string, dashboards.DashboardProvisioningService, org.Service, utils.DashboardStore, folder.Service, dualwrite.Service) (DashboardProvisioner, error)
type DashboardProvisionerFactory func(context.Context, string, dashboards.DashboardProvisioningService, *setting.Cfg, org.Service, utils.DashboardStore, folder.Service, dualwrite.Service, *serverlock.ServerLockService) (DashboardProvisioner, error)
// Provisioner is responsible for syncing dashboard from disk to Grafana's database.
type Provisioner struct {
@@ -38,6 +41,8 @@ type Provisioner struct {
duplicateValidator duplicateValidator
provisioner dashboards.DashboardProvisioningService
dual dualwrite.Service
serverLock *serverlock.ServerLockService
cfg *setting.Cfg
}
func (provider *Provisioner) HasDashboardSources() bool {
@@ -45,7 +50,7 @@ func (provider *Provisioner) HasDashboardSources() bool {
}
// New returns a new DashboardProvisioner
func New(ctx context.Context, configDirectory string, provisioner dashboards.DashboardProvisioningService, orgService org.Service, dashboardStore utils.DashboardStore, folderService folder.Service, dual dualwrite.Service) (DashboardProvisioner, error) {
func New(ctx context.Context, configDirectory string, provisioner dashboards.DashboardProvisioningService, cfg *setting.Cfg, orgService org.Service, dashboardStore utils.DashboardStore, folderService folder.Service, dual dualwrite.Service, serverLockService *serverlock.ServerLockService) (DashboardProvisioner, error) {
logger := log.New("provisioning.dashboard")
cfgReader := &configReader{path: configDirectory, log: logger, orgExists: utils.NewOrgExistsChecker(orgService)}
configs, err := cfgReader.readConfig(ctx)
@@ -78,6 +83,8 @@ func New(ctx context.Context, configDirectory string, provisioner dashboards.Das
duplicateValidator: newDuplicateValidator(logger, fileReaders),
provisioner: provisioner,
dual: dual,
serverLock: serverLockService,
cfg: cfg,
}
return d, nil
@@ -95,23 +102,53 @@ func (provider *Provisioner) Provision(ctx context.Context) error {
}
}
provider.log.Info("starting to provision dashboards")
var errProvisioning error
for _, reader := range provider.fileReaders {
if err := reader.walkDisk(ctx); err != nil {
if os.IsNotExist(err) {
// don't stop the provisioning service in case the folder is missing. The folder can appear after the startup
provider.log.Warn("Failed to provision config", "name", reader.Cfg.Name, "error", err)
return nil
}
return fmt.Errorf("failed to provision config %v: %w", reader.Cfg.Name, err)
// retry obtaining the lock for 20 attempts
retryOpt := func(attempts int) error {
if attempts < 20 {
return nil
}
return errors.New("retries exhausted")
}
provider.duplicateValidator.validate()
provider.log.Info("finished to provision dashboards")
return nil
lockTimeConfig := serverlock.LockTimeConfig{
// if a replica crashes while holding the lock, other replicas can obtain the
// lock after this duration (15s default value, might be configured via config file)
MaxInterval: time.Duration(provider.cfg.ClassicProvisioningDashboardsServerLockMaxIntervalSeconds) * time.Second,
// wait beetween 100ms and 1s before retrying to obtain the lock (default values, might be configured via config file)
MinWait: time.Duration(provider.cfg.ClassicProvisioningDashboardsServerLockMinWaitMs) * time.Millisecond,
MaxWait: time.Duration(provider.cfg.ClassicProvisioningDashboardsServerLockMaxWaitMs) * time.Millisecond,
}
// this means that if we fail to obtain the lock after ~10 seconds, we return an error
lockErr := provider.serverLock.LockExecuteAndReleaseWithRetries(ctx, "provisioning_dashboards", lockTimeConfig, func(ctx context.Context) {
provider.log.Info("starting to provision dashboards")
for _, reader := range provider.fileReaders {
if err := reader.walkDisk(ctx); err != nil {
if os.IsNotExist(err) {
// don't stop the provisioning service in case the folder is missing. The folder can appear after the startup
provider.log.Warn("Failed to provision config", "name", reader.Cfg.Name, "error", err)
return
}
errProvisioning = fmt.Errorf("failed to provision config %v: %w", reader.Cfg.Name, err)
return
}
}
provider.duplicateValidator.validate()
provider.log.Info("finished to provision dashboards")
}, retryOpt)
if lockErr != nil {
provider.log.Error("Failed to obtain dashboard provisioning lock", "error", lockErr)
return lockErr
}
return errProvisioning
}
// CleanUpOrphanedDashboards deletes provisioned dashboards missing a linked reader.

View File

@@ -10,6 +10,7 @@ import (
"github.com/grafana/dskit/services"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/serverlock"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/registry"
"github.com/grafana/grafana/pkg/services/accesscontrol"
@@ -64,6 +65,7 @@ func ProvideService(
tracer tracing.Tracer,
dual dualwrite.Service,
promTypeMigrationProvider promtypemigration.PromTypeMigrationProvider,
serverLockService *serverlock.ServerLockService,
) (*ProvisioningServiceImpl, error) {
s := &ProvisioningServiceImpl{
Cfg: cfg,
@@ -92,6 +94,7 @@ func ProvideService(
tracer: tracer,
migratePrometheusType: promTypeMigrationProvider.Run,
dual: dual,
serverLock: serverLockService,
}
s.NamedService = services.NewBasicService(s.starting, s.running, nil).WithName(ServiceName)
@@ -166,7 +169,7 @@ func (ps *ProvisioningServiceImpl) running(ctx context.Context) error {
func (ps *ProvisioningServiceImpl) setDashboardProvisioner() error {
dashboardPath := filepath.Join(ps.Cfg.ProvisioningPath, "dashboards")
dashProvisioner, err := ps.newDashboardProvisioner(context.Background(), dashboardPath, ps.dashboardProvisioningService, ps.orgService, ps.dashboardService, ps.folderService, ps.dual)
dashProvisioner, err := ps.newDashboardProvisioner(context.Background(), dashboardPath, ps.dashboardProvisioningService, ps.Cfg, ps.orgService, ps.dashboardService, ps.folderService, ps.dual, ps.serverLock)
if err != nil {
return fmt.Errorf("%v: %w", "Failed to create provisioner", err)
}
@@ -242,6 +245,7 @@ type ProvisioningServiceImpl struct {
resourcePermissions accesscontrol.ReceiverPermissionsService
tracer tracing.Tracer
dual dualwrite.Service
serverLock *serverlock.ServerLockService
migratePrometheusType func(context.Context) error
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/serverlock"
dashboardstore "github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/folder"
"github.com/grafana/grafana/pkg/services/org"
@@ -20,6 +21,7 @@ import (
"github.com/grafana/grafana/pkg/services/provisioning/datasources"
"github.com/grafana/grafana/pkg/services/provisioning/utils"
"github.com/grafana/grafana/pkg/services/searchV2"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/storage/legacysql/dualwrite"
)
@@ -160,7 +162,7 @@ func setup(t *testing.T) *serviceTestStruct {
searchStub := searchV2.NewStubSearchService()
service, err := newProvisioningServiceImpl(
func(context.Context, string, dashboardstore.DashboardProvisioningService, org.Service, utils.DashboardStore, folder.Service, dualwrite.Service) (dashboards.DashboardProvisioner, error) {
func(context.Context, string, dashboardstore.DashboardProvisioningService, *setting.Cfg, org.Service, utils.DashboardStore, folder.Service, dualwrite.Service, *serverlock.ServerLockService) (dashboards.DashboardProvisioner, error) {
serviceTest.dashboardProvisionerInstantiations++
return serviceTest.mock, nil
},

View File

@@ -150,6 +150,11 @@ type Cfg struct {
PluginsPath string
EnterpriseLicensePath string
// Classic Provisioning settings
ClassicProvisioningDashboardsServerLockMaxIntervalSeconds int64
ClassicProvisioningDashboardsServerLockMinWaitMs int64
ClassicProvisioningDashboardsServerLockMaxWaitMs int64
// SMTP email settings
Smtp SmtpSettings
@@ -1221,6 +1226,8 @@ func (cfg *Cfg) parseINIFile(iniFile *ini.File) error {
return err
}
cfg.readClassicProvisioningSettings(iniFile)
// read dashboard settings
dashboards := iniFile.Section("dashboards")
cfg.DashboardVersionsToKeep = dashboards.Key("versions_to_keep").MustInt(20)
@@ -2107,6 +2114,12 @@ func (cfg *Cfg) readLiveSettings(iniFile *ini.File) error {
return nil
}
func (cfg *Cfg) readClassicProvisioningSettings(iniFile *ini.File) {
cfg.ClassicProvisioningDashboardsServerLockMinWaitMs = iniFile.Section("classic_provisioning").Key("dashboards_server_lock_min_wait_ms").MustInt64(100)
cfg.ClassicProvisioningDashboardsServerLockMaxWaitMs = iniFile.Section("classic_provisioning").Key("dashboards_server_lock_max_wait_ms").MustInt64(1000)
cfg.ClassicProvisioningDashboardsServerLockMaxIntervalSeconds = iniFile.Section("classic_provisioning").Key("dashboards_server_lock_max_interval_seconds").MustInt64(15)
}
func (cfg *Cfg) readProvisioningSettings(iniFile *ini.File) error {
provisioning := valueAsString(iniFile.Section("paths"), "provisioning", "")
cfg.ProvisioningPath = makeAbsolute(provisioning, cfg.HomePath)