Feature: GDPR compliant audit logs (#17396)

This commit is contained in:
Maksym Trofimenko 2023-11-20 20:31:22 +00:00 committed by GitHub
parent 307a3121aa
commit 996e57b511
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 275 additions and 19 deletions

View File

@ -184,6 +184,7 @@ const (
TraceOtelTimeout = "trace_otel_timeout"
GDPRDeleteUser = "gdpr_delete_user"
GDPRAuditLogs = "gdpr_audit_logs"
// These variables are temporary solution for issue: https://github.com/goharbor/harbor/issues/16039
// When user disable the pull count/time/audit log, it will decrease the database access, especially in large concurrency pull scenarios.

View File

@ -21,12 +21,15 @@ import (
commonmodels "github.com/goharbor/harbor/src/common/models"
"github.com/goharbor/harbor/src/common/security"
"github.com/goharbor/harbor/src/common/security/local"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/jobservice/job/impl/gdpr"
"github.com/goharbor/harbor/src/lib"
"github.com/goharbor/harbor/src/lib/config"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/lib/q"
"github.com/goharbor/harbor/src/pkg/member"
"github.com/goharbor/harbor/src/pkg/oidc"
"github.com/goharbor/harbor/src/pkg/task"
"github.com/goharbor/harbor/src/pkg/user"
"github.com/goharbor/harbor/src/pkg/user/models"
)
@ -76,6 +79,8 @@ func NewController() Controller {
mgr: user.New(),
oidcMetaMgr: oidc.NewMetaMgr(),
memberMgr: member.Mgr,
taskMgr: task.NewManager(),
exeMgr: task.NewExecutionManager(),
}
}
@ -88,6 +93,8 @@ type controller struct {
mgr user.Manager
oidcMetaMgr oidc.MetaManager
memberMgr member.Manager
taskMgr task.Manager
exeMgr task.ExecutionManager
}
func (c *controller) UpdateOIDCMeta(ctx context.Context, ou *commonmodels.OIDCUser, cols ...string) error {
@ -183,10 +190,36 @@ func (c *controller) Delete(ctx context.Context, id int) error {
if err != nil {
return errors.UnknownError(err).WithMessage("failed to load GDPR setting: %v", err)
}
if gdprSetting.DeleteUser {
return c.mgr.DeleteGDPR(ctx, id)
if gdprSetting.AuditLogs {
userDb, err := c.mgr.Get(ctx, id)
if err != nil {
return errors.Wrap(err, "unable to get user information")
}
params := map[string]interface{}{
gdpr.UserNameParam: userDb.Username,
}
execID, err := c.exeMgr.Create(ctx, job.AuditLogsGDPRCompliantVendorType, -1, task.ExecutionTriggerEvent, params)
if err != nil {
return err
}
_, err = c.taskMgr.Create(ctx, execID, &task.Job{
Name: job.AuditLogsGDPRCompliantVendorType,
Metadata: &job.Metadata{
JobKind: job.KindGeneric,
},
Parameters: params,
})
if err != nil {
return err
}
}
return c.mgr.Delete(ctx, id)
if gdprSetting.DeleteUser {
err = c.mgr.DeleteGDPR(ctx, id)
} else {
err = c.mgr.Delete(ctx, id)
}
return err
}
func (c *controller) List(ctx context.Context, query *q.Query, options ...models.Option) ([]*commonmodels.User, error) {

View File

@ -0,0 +1,85 @@
// Copyright Project Harbor Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gdpr
import (
"fmt"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/pkg/audit"
"github.com/goharbor/harbor/src/pkg/user"
)
const UserNameParam = "username"
type AuditLogsDataMasking struct {
manager audit.Manager
userManager user.Manager
}
func (a AuditLogsDataMasking) MaxFails() uint {
return 3
}
func (a AuditLogsDataMasking) MaxCurrency() uint {
return 1
}
func (a AuditLogsDataMasking) ShouldRetry() bool {
return true
}
func (a AuditLogsDataMasking) Validate(params job.Parameters) error {
if params == nil {
// Params are required
return errors.New("missing job parameters")
}
_, err := a.parseParams(params)
return err
}
func (a *AuditLogsDataMasking) init() {
if a.manager == nil {
a.manager = audit.New()
}
if a.userManager == nil {
a.userManager = user.New()
}
}
func (a AuditLogsDataMasking) Run(ctx job.Context, params job.Parameters) error {
logger := ctx.GetLogger()
logger.Info("GDPR audit logs data masking job started")
a.init()
username, err := a.parseParams(params)
if err != nil {
return err
}
logger.Infof("Masking log entries for a user: %s", username)
return a.manager.UpdateUsername(ctx.SystemContext(), username, a.userManager.GenerateCheckSum(username))
}
func (a AuditLogsDataMasking) parseParams(params job.Parameters) (string, error) {
value, exist := params[UserNameParam]
if !exist {
return "", fmt.Errorf("param %s not found", UserNameParam)
}
str, ok := value.(string)
if !ok {
return "", fmt.Errorf("the value of %s isn't string", UserNameParam)
}
return str, nil
}

View File

@ -0,0 +1,67 @@
// Copyright Project Harbor Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gdpr
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/goharbor/harbor/src/jobservice/job"
mockjobservice "github.com/goharbor/harbor/src/testing/jobservice"
"github.com/goharbor/harbor/src/testing/pkg/audit"
"github.com/goharbor/harbor/src/testing/pkg/user"
)
func TestAuditLogsCleanupJobShouldRetry(t *testing.T) {
rep := &AuditLogsDataMasking{}
assert.True(t, rep.ShouldRetry())
}
func TestAuditLogsCleanupJobValidateParams(t *testing.T) {
const validUsername = "user"
var (
manager = &audit.Manager{}
userManager = &user.Manager{}
)
rep := &AuditLogsDataMasking{
manager: manager,
userManager: userManager,
}
err := rep.Validate(nil)
// parameters are required
assert.Error(t, err)
err = rep.Validate(job.Parameters{})
// no required username parameter
assert.Error(t, err)
validParams := job.Parameters{
"username": "user",
}
err = rep.Validate(validParams)
// parameters are valid
assert.Nil(t, err)
ctx := &mockjobservice.MockJobContext{}
logger := &mockjobservice.MockJobLogger{}
ctx.On("GetLogger").Return(logger)
userManager.On("GenerateCheckSum", validUsername).Return("hash")
manager.On("UpdateUsername", context.TODO(), validUsername, "hash").Return(nil)
err = rep.Run(ctx, validParams)
assert.Nil(t, err)
}

View File

@ -44,6 +44,8 @@ const (
ExecSweepVendorType = "EXECUTION_SWEEP"
// ScanAllVendorType: the name of the scan all job
ScanAllVendorType = "SCAN_ALL"
// AuditLogsGDPRCompliantVendorType : the name of the job which makes audit logs table GDPR-compliant
AuditLogsGDPRCompliantVendorType = "AUDIT_LOGS_GDPR_COMPLIANT"
)
var (

View File

@ -24,6 +24,8 @@ import (
"syscall"
"time"
"github.com/goharbor/harbor/src/jobservice/job/impl/gdpr"
"github.com/gomodule/redigo/redis"
"github.com/goharbor/harbor/src/jobservice/api"
@ -327,11 +329,12 @@ func (bs *Bootstrap) loadAndRunRedisWorkerPool(
// In v2.2 we migrate the scheduled replication, garbage collection and scan all to
// the scheduler mechanism, the following three jobs are kept for the legacy jobs
// and they can be removed after several releases
"IMAGE_REPLICATE": (*legacy.ReplicationScheduler)(nil),
"IMAGE_GC": (*legacy.GarbageCollectionScheduler)(nil),
"IMAGE_SCAN_ALL": (*legacy.ScanAllScheduler)(nil),
job.SystemArtifactCleanupVendorType: (*systemartifact.Cleanup)(nil),
job.ExecSweepVendorType: (*task.SweepJob)(nil),
"IMAGE_REPLICATE": (*legacy.ReplicationScheduler)(nil),
"IMAGE_GC": (*legacy.GarbageCollectionScheduler)(nil),
"IMAGE_SCAN_ALL": (*legacy.ScanAllScheduler)(nil),
job.SystemArtifactCleanupVendorType: (*systemartifact.Cleanup)(nil),
job.ExecSweepVendorType: (*task.SweepJob)(nil),
job.AuditLogsGDPRCompliantVendorType: (*gdpr.AuditLogsDataMasking)(nil),
}); err != nil {
// exit
return nil, err

View File

@ -181,6 +181,7 @@ var (
{Name: common.CacheExpireHours, Scope: SystemScope, Group: BasicGroup, EnvKey: "CACHE_EXPIRE_HOURS", DefaultValue: "24", ItemType: &IntType{}, Editable: false, Description: `The expire hours for cache`},
{Name: common.GDPRDeleteUser, Scope: SystemScope, Group: GDPRGroup, EnvKey: "GDPR_DELETE_USER", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The flag indicates if a user should be deleted compliant with GDPR.`},
{Name: common.GDPRAuditLogs, Scope: SystemScope, Group: GDPRGroup, EnvKey: "GDPR_AUDIT_LOGS", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The flag indicates if an audit logs of a deleted user should be GDPR compliant.`},
{Name: common.AuditLogForwardEndpoint, Scope: UserScope, Group: BasicGroup, EnvKey: "AUDIT_LOG_FORWARD_ENDPOINT", DefaultValue: "", ItemType: &StringType{}, Editable: false, Description: `The endpoint to forward the audit log.`},
{Name: common.SkipAuditLogDatabase, Scope: UserScope, Group: BasicGroup, EnvKey: "SKIP_LOG_AUDIT_DATABASE", DefaultValue: "false", ItemType: &BoolType{}, Editable: false, Description: `The option to skip audit log in database`},

View File

@ -98,4 +98,5 @@ type GroupConf struct {
type GDPRSetting struct {
DeleteUser bool `json:"user_delete,omitempty"`
AuditLogs bool `json:"audit_logs"`
}

View File

@ -186,6 +186,7 @@ func GDPRSetting(ctx context.Context) (*cfgModels.GDPRSetting, error) {
}
return &cfgModels.GDPRSetting{
DeleteUser: DefaultMgr().Get(ctx, common.GDPRDeleteUser).GetBool(),
AuditLogs: DefaultMgr().Get(ctx, common.GDPRAuditLogs).GetBool(),
}, nil
}

View File

@ -42,6 +42,8 @@ type DAO interface {
Delete(ctx context.Context, id int64) (err error)
// Purge the audit log
Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error)
// UpdateUsername replaces username in matched records
UpdateUsername(ctx context.Context, username string, usernameReplace string) error
}
// New returns an instance of the default DAO
@ -57,6 +59,15 @@ var allowedMaps = map[string]interface{}{
type dao struct{}
func (d *dao) UpdateUsername(ctx context.Context, username string, usernameReplace string) error {
o, err := orm.FromContext(ctx)
if err != nil {
return err
}
_, err = o.Raw("UPDATE audit_log SET username = ? WHERE username = ?", usernameReplace, username).Exec()
return err
}
// Purge delete expired audit log
func (*dao) Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error) {
ormer, err := orm.FromContext(ctx)

View File

@ -40,6 +40,8 @@ type Manager interface {
Delete(ctx context.Context, id int64) (err error)
// Purge delete the audit log with retention hours
Purge(ctx context.Context, retentionHour int, includeOperations []string, dryRun bool) (int64, error)
// UpdateUsername Replace all log records username with its hash
UpdateUsername(ctx context.Context, username string, replaceWith string) error
}
// New returns a default implementation of Manager
@ -53,6 +55,10 @@ type manager struct {
dao dao.DAO
}
func (m *manager) UpdateUsername(ctx context.Context, username string, replaceWith string) error {
return m.dao.UpdateUsername(ctx, username, replaceWith)
}
// Count ...
func (m *manager) Count(ctx context.Context, query *q.Query) (int64, error) {
return m.dao.Count(ctx, query)

View File

@ -63,6 +63,8 @@ type Manager interface {
// put the id in the pointer of user model, if it does exist, return the user's profile.
// This is used for ldap and uaa authentication, such the user can have an ID in Harbor.
Onboard(ctx context.Context, user *commonmodels.User) error
// GenerateCheckSum generates truncated crc32 checksum from a given string
GenerateCheckSum(in string) string
}
// New returns a default implementation of Manager
@ -111,9 +113,9 @@ func (m *manager) DeleteGDPR(ctx context.Context, id int) error {
if err != nil {
return err
}
u.Username = fmt.Sprintf("%s#%d", checkSum(u.Username), u.UserID)
u.Email = fmt.Sprintf("%s#%d", checkSum(u.Email), u.UserID)
u.Realname = fmt.Sprintf("%s#%d", checkSum(u.Realname), u.UserID)
u.Username = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Username), u.UserID)
u.Email = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Email), u.UserID)
u.Realname = fmt.Sprintf("%s#%d", m.GenerateCheckSum(u.Realname), u.UserID)
u.Deleted = true
return m.dao.Update(ctx, u, "username", "email", "realname", "deleted")
}
@ -231,13 +233,14 @@ func excludeDefaultAdmin(query *q.Query) (qu *q.Query) {
return query
}
// GenerateCheckSum generates checksum for a given string
func (m *manager) GenerateCheckSum(str string) string {
return fmt.Sprintf("%08x", crc32.Checksum([]byte(str), crc32.IEEETable))
}
func injectPasswd(u *commonmodels.User, password string) {
salt := utils.GenerateRandomString()
u.Password = utils.Encrypt(password, salt, utils.SHA256)
u.Salt = salt
u.PasswordVersion = utils.SHA256
}
func checkSum(str string) string {
return fmt.Sprintf("%08x", crc32.Checksum([]byte(str), crc32.IEEETable))
}

View File

@ -65,9 +65,9 @@ func (m *mgrTestSuite) TestUserDeleteGDPR() {
m.dao.On("Update", mock.Anything, testifymock.MatchedBy(
func(u *models.User) bool {
return u.UserID == 123 &&
u.Email == fmt.Sprintf("%s#%d", checkSum("existing@mytest.com"), existingUser.UserID) &&
u.Username == fmt.Sprintf("%s#%d", checkSum("existing"), existingUser.UserID) &&
u.Realname == fmt.Sprintf("%s#%d", checkSum("RealName"), existingUser.UserID) &&
u.Email == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("existing@mytest.com"), existingUser.UserID) &&
u.Username == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("existing"), existingUser.UserID) &&
u.Realname == fmt.Sprintf("%s#%d", m.mgr.GenerateCheckSum("RealName"), existingUser.UserID) &&
u.Deleted == true
}),
"username",

View File

@ -155,6 +155,20 @@ func (_m *DAO) Purge(ctx context.Context, retentionHour int, includeOperations [
return r0, r1
}
// UpdateUsername provides a mock function with given fields: ctx, username, usernameReplace
func (_m *DAO) UpdateUsername(ctx context.Context, username string, usernameReplace string) error {
ret := _m.Called(ctx, username, usernameReplace)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok {
r0 = rf(ctx, username, usernameReplace)
} else {
r0 = ret.Error(0)
}
return r0
}
// NewDAO creates a new instance of DAO. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewDAO(t interface {

View File

@ -154,6 +154,20 @@ func (_m *Manager) Purge(ctx context.Context, retentionHour int, includeOperatio
return r0, r1
}
// UpdateUsername provides a mock function with given fields: ctx, username, replaceWith
func (_m *Manager) UpdateUsername(ctx context.Context, username string, replaceWith string) error {
ret := _m.Called(ctx, username, replaceWith)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok {
r0 = rf(ctx, username, replaceWith)
} else {
r0 = ret.Error(0)
}
return r0
}
// NewManager creates a new instance of Manager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewManager(t interface {

View File

@ -102,6 +102,20 @@ func (_m *Manager) DeleteGDPR(ctx context.Context, id int) error {
return r0
}
// GenerateCheckSum provides a mock function with given fields: in
func (_m *Manager) GenerateCheckSum(in string) string {
ret := _m.Called(in)
var r0 string
if rf, ok := ret.Get(0).(func(string) string); ok {
r0 = rf(in)
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// Get provides a mock function with given fields: ctx, id
func (_m *Manager) Get(ctx context.Context, id int) (*commonmodels.User, error) {
ret := _m.Called(ctx, id)

View File

@ -33,7 +33,7 @@ class TestJobServiceDashboard(unittest.TestCase, object):
self.registry = Registry()
self.scan_all = ScanAll()
self.schedule = Schedule()
self.job_types = [ "GARBAGE_COLLECTION", "PURGE_AUDIT_LOG", "P2P_PREHEAT", "IMAGE_SCAN", "REPLICATION", "RETENTION", "SCAN_DATA_EXPORT", "SCHEDULER", "SLACK", "SYSTEM_ARTIFACT_CLEANUP", "WEBHOOK", "EXECUTION_SWEEP"]
self.job_types = [ "GARBAGE_COLLECTION", "PURGE_AUDIT_LOG", "P2P_PREHEAT", "IMAGE_SCAN", "REPLICATION", "RETENTION", "SCAN_DATA_EXPORT", "SCHEDULER", "SLACK", "SYSTEM_ARTIFACT_CLEANUP", "WEBHOOK", "EXECUTION_SWEEP", "AUDIT_LOGS_GDPR_COMPLIANT"]
self.cron_type = "Custom"
self.cron = "0 0 0 * * 0"