From 996e57b511f52e42571e8f549bde013973fc8da8 Mon Sep 17 00:00:00 2001 From: Maksym Trofimenko Date: Mon, 20 Nov 2023 20:31:22 +0000 Subject: [PATCH] Feature: GDPR compliant audit logs (#17396) --- src/common/const.go | 1 + src/controller/user/controller.go | 39 ++++++++- .../job/impl/gdpr/audit_logs_data_masking.go | 85 +++++++++++++++++++ .../impl/gdpr/audit_logs_data_masking_test.go | 67 +++++++++++++++ src/jobservice/job/known_jobs.go | 2 + src/jobservice/runtime/bootstrap.go | 13 +-- src/lib/config/metadata/metadatalist.go | 1 + src/lib/config/models/model.go | 1 + src/lib/config/userconfig.go | 1 + src/pkg/audit/dao/dao.go | 11 +++ src/pkg/audit/manager.go | 6 ++ src/pkg/user/manager.go | 17 ++-- src/pkg/user/manager_test.go | 6 +- src/testing/pkg/audit/dao/dao.go | 14 +++ src/testing/pkg/audit/manager.go | 14 +++ src/testing/pkg/user/manager.go | 14 +++ .../python/test_job_service_dashboard.py | 2 +- 17 files changed, 275 insertions(+), 19 deletions(-) create mode 100644 src/jobservice/job/impl/gdpr/audit_logs_data_masking.go create mode 100644 src/jobservice/job/impl/gdpr/audit_logs_data_masking_test.go diff --git a/src/common/const.go b/src/common/const.go index 2daf68299..a00601fcc 100644 --- a/src/common/const.go +++ b/src/common/const.go @@ -184,6 +184,7 @@ const ( TraceOtelTimeout = "trace_otel_timeout" GDPRDeleteUser = "gdpr_delete_user" + GDPRAuditLogs = "gdpr_audit_logs" // These variables are temporary solution for issue: https://github.com/goharbor/harbor/issues/16039 // When user disable the pull count/time/audit log, it will decrease the database access, especially in large concurrency pull scenarios. diff --git a/src/controller/user/controller.go b/src/controller/user/controller.go index 5025ff6af..1d0901206 100644 --- a/src/controller/user/controller.go +++ b/src/controller/user/controller.go @@ -21,12 +21,15 @@ import ( commonmodels "github.com/goharbor/harbor/src/common/models" "github.com/goharbor/harbor/src/common/security" "github.com/goharbor/harbor/src/common/security/local" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/job/impl/gdpr" "github.com/goharbor/harbor/src/lib" "github.com/goharbor/harbor/src/lib/config" "github.com/goharbor/harbor/src/lib/errors" "github.com/goharbor/harbor/src/lib/q" "github.com/goharbor/harbor/src/pkg/member" "github.com/goharbor/harbor/src/pkg/oidc" + "github.com/goharbor/harbor/src/pkg/task" "github.com/goharbor/harbor/src/pkg/user" "github.com/goharbor/harbor/src/pkg/user/models" ) @@ -76,6 +79,8 @@ func NewController() Controller { mgr: user.New(), oidcMetaMgr: oidc.NewMetaMgr(), memberMgr: member.Mgr, + taskMgr: task.NewManager(), + exeMgr: task.NewExecutionManager(), } } @@ -88,6 +93,8 @@ type controller struct { mgr user.Manager oidcMetaMgr oidc.MetaManager memberMgr member.Manager + taskMgr task.Manager + exeMgr task.ExecutionManager } func (c *controller) UpdateOIDCMeta(ctx context.Context, ou *commonmodels.OIDCUser, cols ...string) error { @@ -183,10 +190,36 @@ func (c *controller) Delete(ctx context.Context, id int) error { if err != nil { return errors.UnknownError(err).WithMessage("failed to load GDPR setting: %v", err) } - if gdprSetting.DeleteUser { - return c.mgr.DeleteGDPR(ctx, id) + + if gdprSetting.AuditLogs { + userDb, err := c.mgr.Get(ctx, id) + if err != nil { + return errors.Wrap(err, "unable to get user information") + } + params := map[string]interface{}{ + gdpr.UserNameParam: userDb.Username, + } + execID, err := c.exeMgr.Create(ctx, job.AuditLogsGDPRCompliantVendorType, -1, task.ExecutionTriggerEvent, params) + if err != nil { + return err + } + _, err = c.taskMgr.Create(ctx, execID, &task.Job{ + Name: job.AuditLogsGDPRCompliantVendorType, + Metadata: &job.Metadata{ + JobKind: job.KindGeneric, + }, + Parameters: params, + }) + if err != nil { + return err + } } - return c.mgr.Delete(ctx, id) + if gdprSetting.DeleteUser { + err = c.mgr.DeleteGDPR(ctx, id) + } else { + err = c.mgr.Delete(ctx, id) + } + return err } func (c *controller) List(ctx context.Context, query *q.Query, options ...models.Option) ([]*commonmodels.User, error) { diff --git a/src/jobservice/job/impl/gdpr/audit_logs_data_masking.go b/src/jobservice/job/impl/gdpr/audit_logs_data_masking.go new file mode 100644 index 000000000..2d6832e60 --- /dev/null +++ b/src/jobservice/job/impl/gdpr/audit_logs_data_masking.go @@ -0,0 +1,85 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gdpr + +import ( + "fmt" + + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/lib/errors" + "github.com/goharbor/harbor/src/pkg/audit" + "github.com/goharbor/harbor/src/pkg/user" +) + +const UserNameParam = "username" + +type AuditLogsDataMasking struct { + manager audit.Manager + userManager user.Manager +} + +func (a AuditLogsDataMasking) MaxFails() uint { + return 3 +} + +func (a AuditLogsDataMasking) MaxCurrency() uint { + return 1 +} + +func (a AuditLogsDataMasking) ShouldRetry() bool { + return true +} + +func (a AuditLogsDataMasking) Validate(params job.Parameters) error { + if params == nil { + // Params are required + return errors.New("missing job parameters") + } + _, err := a.parseParams(params) + return err +} + +func (a *AuditLogsDataMasking) init() { + if a.manager == nil { + a.manager = audit.New() + } + if a.userManager == nil { + a.userManager = user.New() + } +} + +func (a AuditLogsDataMasking) Run(ctx job.Context, params job.Parameters) error { + logger := ctx.GetLogger() + logger.Info("GDPR audit logs data masking job started") + a.init() + username, err := a.parseParams(params) + if err != nil { + return err + } + logger.Infof("Masking log entries for a user: %s", username) + return a.manager.UpdateUsername(ctx.SystemContext(), username, a.userManager.GenerateCheckSum(username)) +} + +func (a AuditLogsDataMasking) parseParams(params job.Parameters) (string, error) { + value, exist := params[UserNameParam] + if !exist { + return "", fmt.Errorf("param %s not found", UserNameParam) + } + str, ok := value.(string) + if !ok { + return "", fmt.Errorf("the value of %s isn't string", UserNameParam) + } + return str, nil +} diff --git a/src/jobservice/job/impl/gdpr/audit_logs_data_masking_test.go b/src/jobservice/job/impl/gdpr/audit_logs_data_masking_test.go new file mode 100644 index 000000000..d928a88cf --- /dev/null +++ b/src/jobservice/job/impl/gdpr/audit_logs_data_masking_test.go @@ -0,0 +1,67 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gdpr + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/goharbor/harbor/src/jobservice/job" + mockjobservice "github.com/goharbor/harbor/src/testing/jobservice" + "github.com/goharbor/harbor/src/testing/pkg/audit" + "github.com/goharbor/harbor/src/testing/pkg/user" +) + +func TestAuditLogsCleanupJobShouldRetry(t *testing.T) { + rep := &AuditLogsDataMasking{} + assert.True(t, rep.ShouldRetry()) +} + +func TestAuditLogsCleanupJobValidateParams(t *testing.T) { + const validUsername = "user" + var ( + manager = &audit.Manager{} + userManager = &user.Manager{} + ) + + rep := &AuditLogsDataMasking{ + manager: manager, + userManager: userManager, + } + err := rep.Validate(nil) + // parameters are required + assert.Error(t, err) + err = rep.Validate(job.Parameters{}) + // no required username parameter + assert.Error(t, err) + validParams := job.Parameters{ + "username": "user", + } + err = rep.Validate(validParams) + // parameters are valid + assert.Nil(t, err) + + ctx := &mockjobservice.MockJobContext{} + logger := &mockjobservice.MockJobLogger{} + + ctx.On("GetLogger").Return(logger) + userManager.On("GenerateCheckSum", validUsername).Return("hash") + manager.On("UpdateUsername", context.TODO(), validUsername, "hash").Return(nil) + + err = rep.Run(ctx, validParams) + assert.Nil(t, err) +} diff --git a/src/jobservice/job/known_jobs.go b/src/jobservice/job/known_jobs.go index 5ff43497c..5944decfa 100644 --- a/src/jobservice/job/known_jobs.go +++ b/src/jobservice/job/known_jobs.go @@ -44,6 +44,8 @@ const ( ExecSweepVendorType = "EXECUTION_SWEEP" // ScanAllVendorType: the name of the scan all job ScanAllVendorType = "SCAN_ALL" + // AuditLogsGDPRCompliantVendorType : the name of the job which makes audit logs table GDPR-compliant + AuditLogsGDPRCompliantVendorType = "AUDIT_LOGS_GDPR_COMPLIANT" ) var ( diff --git a/src/jobservice/runtime/bootstrap.go b/src/jobservice/runtime/bootstrap.go index aad3837bb..bd2aabd7c 100644 --- a/src/jobservice/runtime/bootstrap.go +++ b/src/jobservice/runtime/bootstrap.go @@ -24,6 +24,8 @@ import ( "syscall" "time" + "github.com/goharbor/harbor/src/jobservice/job/impl/gdpr" + "github.com/gomodule/redigo/redis" "github.com/goharbor/harbor/src/jobservice/api" @@ -327,11 +329,12 @@ func (bs *Bootstrap) loadAndRunRedisWorkerPool( // In v2.2 we migrate the scheduled replication, garbage collection and scan all to // the scheduler mechanism, the following three jobs are kept for the legacy jobs // and they can be removed after several releases - "IMAGE_REPLICATE": (*legacy.ReplicationScheduler)(nil), - "IMAGE_GC": (*legacy.GarbageCollectionScheduler)(nil), - "IMAGE_SCAN_ALL": (*legacy.ScanAllScheduler)(nil), - job.SystemArtifactCleanupVendorType: (*systemartifact.Cleanup)(nil), - job.ExecSweepVendorType: (*task.SweepJob)(nil), + "IMAGE_REPLICATE": (*legacy.ReplicationScheduler)(nil), + "IMAGE_GC": (*legacy.GarbageCollectionScheduler)(nil), + "IMAGE_SCAN_ALL": (*legacy.ScanAllScheduler)(nil), + job.SystemArtifactCleanupVendorType: (*systemartifact.Cleanup)(nil), + job.ExecSweepVendorType: (*task.SweepJob)(nil), + job.AuditLogsGDPRCompliantVendorType: (*gdpr.AuditLogsDataMasking)(nil), }); err != nil { // exit return nil, err diff --git a/src/lib/config/metadata/metadatalist.go b/src/lib/config/metadata/metadatalist.go index 535226bc8..f1ceb938b 100644 --- a/src/lib/config/metadata/metadatalist.go +++ b/src/lib/config/metadata/metadatalist.go @@ -181,6 +181,7 @@ var ( {Name: common.CacheExpireHours, Scope: SystemScope, Group: BasicGroup, EnvKey: "CACHE_EXPIRE_HOURS", DefaultValue: "24", ItemType: &IntType{}, Editable: false, Description: `The expire hours for cache`}, {Name: common.GDPRDeleteUser, Scope: SystemScope, Group: GDPRGroup, EnvKey: "GDPR_DELETE_USER", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The flag indicates if a user should be deleted compliant with GDPR.`}, + {Name: common.GDPRAuditLogs, Scope: SystemScope, Group: GDPRGroup, EnvKey: "GDPR_AUDIT_LOGS", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The flag indicates if an audit logs of a deleted user should be GDPR compliant.`}, {Name: common.AuditLogForwardEndpoint, Scope: UserScope, Group: BasicGroup, EnvKey: "AUDIT_LOG_FORWARD_ENDPOINT", DefaultValue: "", ItemType: &StringType{}, Editable: false, Description: `The endpoint to forward the audit log.`}, {Name: common.SkipAuditLogDatabase, Scope: UserScope, Group: BasicGroup, EnvKey: "SKIP_LOG_AUDIT_DATABASE", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The option to skip audit log in database`}, diff --git a/src/lib/config/models/model.go b/src/lib/config/models/model.go index 6b21fda80..51a9c0c2c 100644 --- a/src/lib/config/models/model.go +++ b/src/lib/config/models/model.go @@ -98,4 +98,5 @@ type GroupConf struct { type GDPRSetting struct { DeleteUser bool `json:"user_delete,omitempty"` + AuditLogs bool `json:"audit_logs"` } diff --git a/src/lib/config/userconfig.go b/src/lib/config/userconfig.go index e22b438f7..0192e6f58 100644 --- a/src/lib/config/userconfig.go +++ b/src/lib/config/userconfig.go @@ -186,6 +186,7 @@ func GDPRSetting(ctx context.Context) (*cfgModels.GDPRSetting, error) { } return &cfgModels.GDPRSetting{ DeleteUser: DefaultMgr().Get(ctx, common.GDPRDeleteUser).GetBool(), + AuditLogs: DefaultMgr().Get(ctx, common.GDPRAuditLogs).GetBool(), }, nil } diff --git a/src/pkg/audit/dao/dao.go b/src/pkg/audit/dao/dao.go index 019ffe6d5..3a816b6c4 100644 --- a/src/pkg/audit/dao/dao.go +++ b/src/pkg/audit/dao/dao.go @@ -42,6 +42,8 @@ type DAO interface { Delete(ctx context.Context, id int64) (err error) // Purge the audit log Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error) + // UpdateUsername replaces username in matched records + UpdateUsername(ctx context.Context, username string, usernameReplace string) error } // New returns an instance of the default DAO @@ -57,6 +59,15 @@ var allowedMaps = map[string]interface{}{ type dao struct{} +func (d *dao) UpdateUsername(ctx context.Context, username string, usernameReplace string) error { + o, err := orm.FromContext(ctx) + if err != nil { + return err + } + _, err = o.Raw("UPDATE audit_log SET username = ? WHERE username = ?", usernameReplace, username).Exec() + return err +} + // Purge delete expired audit log func (*dao) Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error) { ormer, err := orm.FromContext(ctx) diff --git a/src/pkg/audit/manager.go b/src/pkg/audit/manager.go index b81a89bfe..136d23ff7 100644 --- a/src/pkg/audit/manager.go +++ b/src/pkg/audit/manager.go @@ -40,6 +40,8 @@ type Manager interface { Delete(ctx context.Context, id int64) (err error) // Purge delete the audit log with retention hours Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error) + // UpdateUsername Replace all log records username with its hash + UpdateUsername(ctx context.Context, username string, replaceWith string) error } // New returns a default implementation of Manager @@ -53,6 +55,10 @@ type manager struct { dao dao.DAO } +func (m *manager) UpdateUsername(ctx context.Context, username string, replaceWith string) error { + return m.dao.UpdateUsername(ctx, username, replaceWith) +} + // Count ... func (m *manager) Count(ctx context.Context, query *q.Query) (int64, error) { return m.dao.Count(ctx, query) diff --git a/src/pkg/user/manager.go b/src/pkg/user/manager.go index f6420f4fa..5efb05578 100644 --- a/src/pkg/user/manager.go +++ b/src/pkg/user/manager.go @@ -63,6 +63,8 @@ type Manager interface { // put the id in the pointer of user model, if it does exist, return the user's profile. // This is used for ldap and uaa authentication, such the user can have an ID in Harbor. Onboard(ctx context.Context, user *commonmodels.User) error + // GenerateCheckSum generates truncated crc32 checksum from a given string + GenerateCheckSum(in string) string } // New returns a default implementation of Manager @@ -111,9 +113,9 @@ func (m *manager) DeleteGDPR(ctx context.Context, id int) error { if err != nil { return err } - u.Username = fmt.Sprintf("%s#%d", checkSum(u.Username), u.UserID) - u.Email = fmt.Sprintf("%s#%d", checkSum(u.Email), u.UserID) - u.Realname = fmt.Sprintf("%s#%d", checkSum(u.Realname), u.UserID) + u.Username = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Username), u.UserID) + u.Email = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Email), u.UserID) + u.Realname = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Realname), u.UserID) u.Deleted = true return m.dao.Update(ctx, u, "username", "email", "realname", "deleted") } @@ -231,13 +233,14 @@ func excludeDefaultAdmin(query *q.Query) (qu *q.Query) { return query } +// GenerateCheckSum generates checksum for a given string +func (m *manager) GenerateCheckSum(str string) string { + return fmt.Sprintf("%08x", crc32.Checksum([]byte(str), crc32.IEEETable)) +} + func injectPasswd(u *commonmodels.User, password string) { salt := utils.GenerateRandomString() u.Password = utils.Encrypt(password, salt, utils.SHA256) u.Salt = salt u.PasswordVersion = utils.SHA256 } - -func checkSum(str string) string { - return fmt.Sprintf("%08x", crc32.Checksum([]byte(str), crc32.IEEETable)) -} diff --git a/src/pkg/user/manager_test.go b/src/pkg/user/manager_test.go index 32714be5f..166513149 100644 --- a/src/pkg/user/manager_test.go +++ b/src/pkg/user/manager_test.go @@ -65,9 +65,9 @@ func (m *mgrTestSuite) TestUserDeleteGDPR() { m.dao.On("Update", mock.Anything, testifymock.MatchedBy( func(u *models.User) bool { return u.UserID == 123 && - u.Email == fmt.Sprintf("%s#%d", checkSum("existing@mytest.com"), existingUser.UserID) && - u.Username == fmt.Sprintf("%s#%d", checkSum("existing"), existingUser.UserID) && - u.Realname == fmt.Sprintf("%s#%d", checkSum("RealName"), existingUser.UserID) && + u.Email == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("existing@mytest.com"), existingUser.UserID) && + u.Username == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("existing"), existingUser.UserID) && + u.Realname == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("RealName"), existingUser.UserID) && u.Deleted == true }), "username", diff --git a/src/testing/pkg/audit/dao/dao.go b/src/testing/pkg/audit/dao/dao.go index bbfce5e31..97665fdaa 100644 --- a/src/testing/pkg/audit/dao/dao.go +++ b/src/testing/pkg/audit/dao/dao.go @@ -155,6 +155,20 @@ func (_m *DAO) Purge(ctx context.Context, retentionHour int, includeOperations [ return r0, r1 } +// UpdateUsername provides a mock function with given fields: ctx, username, usernameReplace +func (_m *DAO) UpdateUsername(ctx context.Context, username string, usernameReplace string) error { + ret := _m.Called(ctx, username, usernameReplace) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, username, usernameReplace) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // NewDAO creates a new instance of DAO. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewDAO(t interface { diff --git a/src/testing/pkg/audit/manager.go b/src/testing/pkg/audit/manager.go index 38fbd38a7..1efcd06df 100644 --- a/src/testing/pkg/audit/manager.go +++ b/src/testing/pkg/audit/manager.go @@ -154,6 +154,20 @@ func (_m *Manager) Purge(ctx context.Context, retentionHour int, includeOperatio return r0, r1 } +// UpdateUsername provides a mock function with given fields: ctx, username, replaceWith +func (_m *Manager) UpdateUsername(ctx context.Context, username string, replaceWith string) error { + ret := _m.Called(ctx, username, replaceWith) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, username, replaceWith) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // NewManager creates a new instance of Manager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewManager(t interface { diff --git a/src/testing/pkg/user/manager.go b/src/testing/pkg/user/manager.go index aec51e17a..83b4f9d77 100644 --- a/src/testing/pkg/user/manager.go +++ b/src/testing/pkg/user/manager.go @@ -102,6 +102,20 @@ func (_m *Manager) DeleteGDPR(ctx context.Context, id int) error { return r0 } +// GenerateCheckSum provides a mock function with given fields: in +func (_m *Manager) GenerateCheckSum(in string) string { + ret := _m.Called(in) + + var r0 string + if rf, ok := ret.Get(0).(func(string) string); ok { + r0 = rf(in) + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + // Get provides a mock function with given fields: ctx, id func (_m *Manager) Get(ctx context.Context, id int) (*commonmodels.User, error) { ret := _m.Called(ctx, id) diff --git a/tests/apitests/python/test_job_service_dashboard.py b/tests/apitests/python/test_job_service_dashboard.py index 6fdc8460e..fd71c4c5a 100644 --- a/tests/apitests/python/test_job_service_dashboard.py +++ b/tests/apitests/python/test_job_service_dashboard.py @@ -33,7 +33,7 @@ class TestJobServiceDashboard(unittest.TestCase, object): self.registry = Registry() self.scan_all = ScanAll() self.schedule = Schedule() - self.job_types = [ "GARBAGE_COLLECTION", "PURGE_AUDIT_LOG", "P2P_PREHEAT", "IMAGE_SCAN", "REPLICATION", "RETENTION", "SCAN_DATA_EXPORT", "SCHEDULER", "SLACK", "SYSTEM_ARTIFACT_CLEANUP", "WEBHOOK", "EXECUTION_SWEEP"] + self.job_types = [ "GARBAGE_COLLECTION", "PURGE_AUDIT_LOG", "P2P_PREHEAT", "IMAGE_SCAN", "REPLICATION", "RETENTION", "SCAN_DATA_EXPORT", "SCHEDULER", "SLACK", "SYSTEM_ARTIFACT_CLEANUP", "WEBHOOK", "EXECUTION_SWEEP", "AUDIT_LOGS_GDPR_COMPLIANT"] self.cron_type = "Custom" self.cron = "0 0 0 * * 0"