From b2651f39b6dbe79e05d42f8a3bfbbc11c42c5d4c Mon Sep 17 00:00:00 2001 From: Dileep Ranganathan Date: Thu, 5 Sep 2019 21:46:59 -0700 Subject: Fix issue with concurrent CR creation The collectd operator is going into deadlock when concurrent update operations happen within the same controller trying to update the resource. Fixed this by adding Mutex. Deleted the old build_image.sh which is replaced by new script which builds and pushes from inside a builder docker container. This helps in tackling the dependency issues for image build. Updated the README for build image script usage. Issue-ID: ONAPARC-461 Signed-off-by: Dileep Ranganathan Change-Id: Ib3c2d1edd266e70bb713885de7ad046ebf5ad086 --- .../collectdplugin/collectdplugin_controller.go | 59 +++++++++++----------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'vnfs/DAaaS/microservices/collectd-operator/pkg/controller/collectdplugin/collectdplugin_controller.go') diff --git a/vnfs/DAaaS/microservices/collectd-operator/pkg/controller/collectdplugin/collectdplugin_controller.go b/vnfs/DAaaS/microservices/collectd-operator/pkg/controller/collectdplugin/collectdplugin_controller.go index 9329c55e..644a6bb3 100644 --- a/vnfs/DAaaS/microservices/collectd-operator/pkg/controller/collectdplugin/collectdplugin_controller.go +++ b/vnfs/DAaaS/microservices/collectd-operator/pkg/controller/collectdplugin/collectdplugin_controller.go @@ -5,6 +5,7 @@ import ( "fmt" "reflect" "strings" + "sync" "github.com/go-logr/logr" "github.com/operator-framework/operator-sdk/pkg/predicate" @@ -28,6 +29,8 @@ import ( var log = logf.Log.WithName("controller_collectdplugin") +var reconcileLock sync.Mutex + // Add creates a new CollectdPlugin Controller and adds it to the Manager. The Manager will set fields on the Controller // and Start it when the Manager is Started. func Add(mgr manager.Manager) error { @@ -142,45 +145,43 @@ func (r *ReconcileCollectdPlugin) Reconcile(request reconcile.Request) (reconcil } // Handle the reconciliation for CollectdPlugin. // At this stage the Status of the CollectdPlugin should NOT be "" + reconcileLock.Lock() err = r.handleCollectdPlugin(reqLogger, instance, false) + reconcileLock.Unlock() return reconcile.Result{}, err } // handleCollectdPlugin regenerates the collectd conf on CR Create, Update, Delete events func (r *ReconcileCollectdPlugin) handleCollectdPlugin(reqLogger logr.Logger, cr *onapv1alpha1.CollectdPlugin, isDelete bool) error { + var collectdConf string + retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + cm, err := collectdutils.GetConfigMap(r.client, reqLogger, cr.Namespace) + if err != nil { + reqLogger.Error(err, "Skip reconcile: ConfigMap not found") + return err + } + reqLogger.V(1).Info(":::: ConfigMap Info ::::", "ConfigMap.Namespace", cm.Namespace, "ConfigMap.Name", cm.Name) - rmap, err := collectdutils.FindResourceMapForCR(r.client, reqLogger, cr.Namespace) - if err != nil { - reqLogger.Error(err, "Skip reconcile: Resources not found") - return err - } - - cm := rmap.ConfigMap - reqLogger.V(1).Info("Found ResourceMap") - reqLogger.V(1).Info(":::: ConfigMap Info ::::", "ConfigMap.Namespace", cm.Namespace, "ConfigMap.Name", cm.Name) + collectdConf, err := collectdutils.RebuildCollectdConf(r.client, cr.Namespace, isDelete, cr.Spec.PluginName) + if err != nil { + reqLogger.Error(err, "Skip reconcile: Rebuild conf failed") + return err + } - collectdConf, err := collectdutils.RebuildCollectdConf(r.client, cr.Namespace, isDelete, cr.Spec.PluginName) - if err != nil { - reqLogger.Error(err, "Skip reconcile: Rebuild conf failed") - return err - } + cm.SetAnnotations(map[string]string{ + "daaas-random": collectdutils.ComputeSHA256([]byte(collectdConf)), + }) + cm.Data["collectd.conf"] = collectdConf - cm.SetAnnotations(map[string]string{ - "daaas-random": collectdutils.ComputeSHA256([]byte(collectdConf)), + // Update the ConfigMap with new Spec and reload DaemonSets + reqLogger.Info("Updating the ConfigMap", "ConfigMap.Namespace", cm.Namespace, "ConfigMap.Name", cm.Name) + updateErr := r.client.Update(context.TODO(), cm) + return updateErr }) - - cm.Data["collectd.conf"] = collectdConf - - // Update the ConfigMap with new Spec and reload DaemonSets - reqLogger.Info("Updating the ConfigMap", "ConfigMap.Namespace", cm.Namespace, "ConfigMap.Name", cm.Name) - log.V(1).Info("ConfigMap Data", "Map: ", cm.Data) - err = r.client.Update(context.TODO(), cm) - if err != nil { - reqLogger.Error(err, "Update the ConfigMap failed", "ConfigMap.Namespace", cm.Namespace, "ConfigMap.Name", cm.Name) - return err + if retryErr != nil { + panic(fmt.Errorf("Update failed: %v", retryErr)) } - - retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + retryErr = retry.RetryOnConflict(retry.DefaultRetry, func() error { // Retrieve the latest version of Daemonset before attempting update // RetryOnConflict uses exponential backoff to avoid exhausting the apiserver // Select DaemonSets with label @@ -214,7 +215,7 @@ func (r *ReconcileCollectdPlugin) handleCollectdPlugin(reqLogger logr.Logger, cr panic(fmt.Errorf("Update failed: %v", retryErr)) } - err = r.updateStatus(cr) + err := r.updateStatus(cr) if err != nil { reqLogger.Error(err, "Unable to update status") return err -- cgit 1.2.3-korg