Upgrade the artifact table

Split the table artifact into artifact and tags, and populate related data

Signed-off-by: Wenkai Yin <yinw@vmware.com>
This commit is contained in:
Wenkai Yin 2020-02-03 16:45:33 +08:00
parent 88fcacd4b7
commit 9312b788dc
12 changed files with 160 additions and 93 deletions

View File

@ -1,71 +0,0 @@
/* TODO remove the table artifact_2 and use the artifact instead after finishing the upgrade work */
CREATE TABLE artifact_2
(
id SERIAL PRIMARY KEY NOT NULL,
/* image, chart, etc */
type varchar(255) NOT NULL,
media_type varchar(255) NOT NULL,
manifest_media_type varchar(255) NOT NULL,
project_id int NOT NULL,
repository_id int NOT NULL,
digest varchar(255) NOT NULL,
size bigint,
push_time timestamp default CURRENT_TIMESTAMP,
pull_time timestamp,
extra_attrs text,
annotations jsonb,
CONSTRAINT unique_artifact_2 UNIQUE (repository_id, digest)
);
CREATE TABLE tag
(
id SERIAL PRIMARY KEY NOT NULL,
repository_id int NOT NULL,
artifact_id int NOT NULL,
name varchar(255) NOT NULL,
push_time timestamp default CURRENT_TIMESTAMP,
pull_time timestamp,
/* TODO replace artifact_2 after finishing the upgrade work */
FOREIGN KEY (artifact_id) REFERENCES artifact_2(id),
CONSTRAINT unique_tag UNIQUE (repository_id, name)
);
/* artifact_reference records the child artifact referenced by parent artifact */
CREATE TABLE artifact_reference
(
id SERIAL PRIMARY KEY NOT NULL,
parent_id int NOT NULL,
child_id int NOT NULL,
platform varchar(255),
/* TODO replace artifact_2 after finishing the upgrade work */
FOREIGN KEY (parent_id) REFERENCES artifact_2(id),
FOREIGN KEY (child_id) REFERENCES artifact_2(id),
CONSTRAINT unique_reference UNIQUE (parent_id, child_id)
);
/* artifact_trash records deleted artifact */
CREATE TABLE artifact_trash
(
id SERIAL PRIMARY KEY NOT NULL,
media_type varchar(255) NOT NULL,
manifest_media_type varchar(255) NOT NULL,
repository_name varchar(255) NOT NULL,
digest varchar(255) NOT NULL,
creation_time timestamp default CURRENT_TIMESTAMP,
CONSTRAINT unique_artifact_trash UNIQUE (repository_name, digest)
);
/* TODO upgrade: how about keep the table "harbor_resource_label" only for helm v2 chart and use the new table for artifact label reference? */
/* label_reference records the labels added to the artifact */
CREATE TABLE label_reference (
id SERIAL PRIMARY KEY NOT NULL,
label_id int NOT NULL,
artifact_id int NOT NULL,
creation_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP,
FOREIGN KEY (label_id) REFERENCES harbor_label(id),
/* TODO replace artifact_2 after finishing the upgrade work */
FOREIGN KEY (artifact_id) REFERENCES artifact_2(id),
CONSTRAINT unique_label_reference UNIQUE (label_id,artifact_id)
);

View File

@ -0,0 +1,139 @@
ALTER TABLE artifact ADD COLUMN repository_id int;
ALTER TABLE artifact ADD COLUMN media_type varchar(255);
ALTER TABLE artifact ADD COLUMN manifest_media_type varchar(255);
ALTER TABLE artifact ADD COLUMN size bigint;
ALTER TABLE artifact ADD COLUMN extra_attrs text;
ALTER TABLE artifact ADD COLUMN annotations jsonb;
ALTER TABLE artifact RENAME COLUMN kind TO type;
ALTER TABLE artifact DROP COLUMN creation_time;
/*set the media type*/
UPDATE artifact AS art
SET type='IMAGE', repository_id=repo.repository_id,
manifest_media_type=blob.content_type,
media_type=(
CASE
/*v2 manifest*/
WHEN blob.content_type='application/vnd.docker.distribution.manifest.v2+json' THEN
'application/vnd.docker.container.image.v1+json'
/*manifest list*/
WHEN blob.content_type='application/vnd.docker.distribution.manifest.list.v2+json' THEN
'application/vnd.docker.distribution.manifest.list.v2+json'
/*v1 manifest*/
ELSE
'application/vnd.docker.distribution.manifest.v1+prettyjws'
END
)
FROM repository AS repo, blob AS blob
WHERE art.repo=repo.name AND art.digest=blob.digest;
ALTER TABLE artifact ALTER COLUMN repository_id SET NOT NULL;
ALTER TABLE artifact ALTER COLUMN media_type SET NOT NULL;
ALTER TABLE artifact ALTER COLUMN manifest_media_type SET NOT NULL;
ALTER TABLE artifact RENAME COLUMN repo TO repository_name;
CREATE TABLE tag
(
id SERIAL PRIMARY KEY NOT NULL,
repository_id int NOT NULL,
artifact_id int NOT NULL,
name varchar(255) NOT NULL,
push_time timestamp default CURRENT_TIMESTAMP,
pull_time timestamp,
FOREIGN KEY (artifact_id) REFERENCES artifact(id),
CONSTRAINT unique_tag UNIQUE (repository_id, name)
);
/*move the tag in the table artifact into table tag*/
INSERT INTO tag (artifact_id, repository_id, name, push_time, pull_time)
SELECT ordered_art.id, art.repository_id, art.tag, art.push_time, art.pull_time
FROM artifact AS art
JOIN (
/*the tag references the first artifact that with the same digest*/
SELECT id, repository_name, digest, row_number() OVER (PARTITION BY repository_name, digest ORDER BY id) AS seq FROM artifact
) AS ordered_art ON art.repository_name=ordered_art.repository_name AND art.digest=ordered_art.digest
WHERE ordered_art.seq=1;
ALTER TABLE artifact DROP COLUMN tag;
/*TODO: remove this after insert the repository_name when create artifact*/
ALTER TABLE artifact ALTER COLUMN repository_name DROP NOT NULL;
/*remove the duplicate artifact rows*/
DELETE FROM artifact
WHERE id NOT IN (
SELECT artifact_id
FROM tag
);
ALTER TABLE artifact ADD CONSTRAINT unique_artifact UNIQUE (repository_id, digest);
/*set artifact size*/
UPDATE artifact
SET size=s.size
FROM (
SELECT art.digest, sum(blob.size) AS size
FROM artifact AS art, artifact_blob AS ref, blob AS blob
WHERE art.digest=ref.digest_af AND ref.digest_blob=blob.digest
GROUP BY art.digest
) AS s
WHERE artifact.digest=s.digest;
/* artifact_reference records the child artifact referenced by parent artifact */
CREATE TABLE artifact_reference
(
id SERIAL PRIMARY KEY NOT NULL,
parent_id int NOT NULL,
child_id int NOT NULL,
platform varchar(255),
FOREIGN KEY (parent_id) REFERENCES artifact(id),
FOREIGN KEY (child_id) REFERENCES artifact(id),
CONSTRAINT unique_reference UNIQUE (parent_id, child_id)
);
/* artifact_trash records deleted artifact */
CREATE TABLE artifact_trash
(
id SERIAL PRIMARY KEY NOT NULL,
media_type varchar(255) NOT NULL,
manifest_media_type varchar(255) NOT NULL,
repository_name varchar(255) NOT NULL,
digest varchar(255) NOT NULL,
creation_time timestamp default CURRENT_TIMESTAMP,
CONSTRAINT unique_artifact_trash UNIQUE (repository_name, digest)
);
/* TODO upgrade: how about keep the table "harbor_resource_label" only for helm v2 chart and use the new table for artifact label reference? */
/* label_reference records the labels added to the artifact */
CREATE TABLE label_reference (
id SERIAL PRIMARY KEY NOT NULL,
label_id int NOT NULL,
artifact_id int NOT NULL,
creation_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP,
FOREIGN KEY (label_id) REFERENCES harbor_label(id),
FOREIGN KEY (artifact_id) REFERENCES artifact(id),
CONSTRAINT unique_label_reference UNIQUE (label_id,artifact_id)
);
/* TODO remove this table after clean up code that related with the old artifact model */
CREATE TABLE artifact_2
(
id SERIAL PRIMARY KEY NOT NULL,
project_id int NOT NULL,
repo varchar(255) NOT NULL,
tag varchar(255) NOT NULL,
/*
digest of manifest
*/
digest varchar(255) NOT NULL,
/*
kind of artifact, image, chart, etc..
*/
kind varchar(255) NOT NULL,
creation_time timestamp default CURRENT_TIMESTAMP,
pull_time timestamp,
push_time timestamp,
CONSTRAINT unique_artifact_2 UNIQUE (project_id, repo, tag)
);

View File

@ -65,7 +65,7 @@ func DeleteArtifact(id int64) error {
// DeleteArtifactByDigest ...
func DeleteArtifactByDigest(projectID int64, repo, digest string) error {
_, err := GetOrmer().Raw(`delete from artifact where project_id = ? and repo = ? and digest = ? `,
_, err := GetOrmer().Raw(`delete from artifact_2 where project_id = ? and repo = ? and digest = ? `,
projectID, repo, digest).Exec()
if err != nil {
return err
@ -75,7 +75,7 @@ func DeleteArtifactByDigest(projectID int64, repo, digest string) error {
// DeleteArtifactByTag ...
func DeleteArtifactByTag(projectID int64, repo, tag string) error {
_, err := GetOrmer().Raw(`delete from artifact where project_id = ? and repo = ? and tag = ? `,
_, err := GetOrmer().Raw(`delete from artifact_2 where project_id = ? and repo = ? and tag = ? `,
projectID, repo, tag).Exec()
if err != nil {
return err

View File

@ -190,7 +190,7 @@ FROM
SELECT
digest
FROM
artifact
artifact_2
WHERE
(
project_id = ?

View File

@ -127,7 +127,7 @@ SELECT
bb.content_type,
bb.size,
bb.creation_time
FROM artifact af
FROM artifact_2 af
JOIN artifact_blob afnb
ON af.digest = afnb.digest_af
JOIN BLOB bb
@ -158,7 +158,7 @@ SELECT
bb.content_type,
bb.size,
bb.creation_time
FROM artifact af
FROM artifact_2 af
JOIN artifact_blob afnb
ON af.digest = afnb.digest_af
JOIN BLOB bb

View File

@ -19,7 +19,7 @@ type Artifact struct {
// TableName ...
func (af *Artifact) TableName() string {
return "artifact"
return "artifact_2"
}
// ArtifactQuery ...

View File

@ -16,6 +16,7 @@ package dao
import (
"context"
beegoorm "github.com/astaxie/beego/orm"
ierror "github.com/goharbor/harbor/src/internal/error"
"github.com/goharbor/harbor/src/internal/orm"
@ -50,22 +51,21 @@ type DAO interface {
}
const (
// TODO replace the table name "artifact_2" after upgrade
// both tagged and untagged artifacts
all = `IN (
SELECT DISTINCT art.id FROM artifact_2 art
SELECT DISTINCT art.id FROM artifact art
LEFT JOIN tag ON art.id=tag.artifact_id
LEFT JOIN artifact_reference ref ON art.id=ref.child_id
WHERE tag.id IS NOT NULL OR ref.id IS NULL)`
// only untagged artifacts
untagged = `IN (
SELECT DISTINCT art.id FROM artifact_2 art
SELECT DISTINCT art.id FROM artifact art
LEFT JOIN tag ON art.id=tag.artifact_id
LEFT JOIN artifact_reference ref ON art.id=ref.child_id
WHERE tag.id IS NULL AND ref.id IS NULL)`
// only tagged artifacts
tagged = `IN (
SELECT DISTINCT art.id FROM artifact_2 art
SELECT DISTINCT art.id FROM artifact art
LEFT JOIN tag ON art.id=tag.artifact_id
WHERE tag.id IS NOT NULL)`
)

View File

@ -43,8 +43,7 @@ type Artifact struct {
// TableName for artifact
func (a *Artifact) TableName() string {
// TODO use "artifact" after finishing the upgrade/migration work
return "artifact_2"
return "artifact"
}
// ArtifactReference records the child artifact referenced by parent artifact

View File

@ -2,10 +2,11 @@ package dao
import (
"context"
"time"
ierror "github.com/goharbor/harbor/src/internal/error"
"github.com/goharbor/harbor/src/internal/orm"
"github.com/goharbor/harbor/src/pkg/artifactrash/model"
"time"
)
// DAO is the data access object interface for artifact trash
@ -61,7 +62,6 @@ func (d *dao) Delete(ctx context.Context, id int64) (err error) {
}
// Filter ...
// ToDo replace artifact_2 with artifact
func (d *dao) Filter(ctx context.Context) (arts []model.ArtifactTrash, err error) {
var deletedAfs []model.ArtifactTrash
ormer, err := orm.FromContext(ctx)
@ -69,7 +69,7 @@ func (d *dao) Filter(ctx context.Context) (arts []model.ArtifactTrash, err error
return deletedAfs, err
}
sql := `SELECT * FROM artifact_trash where artifact_trash.digest NOT IN (select digest from artifact_2)`
sql := `SELECT * FROM artifact_trash where artifact_trash.digest NOT IN (select digest from artifact)`
if err := ormer.Raw(sql).QueryRow(&deletedAfs); err != nil {
return deletedAfs, err

View File

@ -190,7 +190,7 @@ func (d *dao) FindBlobsShouldUnassociatedWithProject(ctx context.Context, projec
return nil, err
}
sql := `SELECT b.digest_blob FROM artifact_2 a, artifact_blob b WHERE a.digest = b.digest_af AND a.project_id = ? AND b.digest_blob IN (%s)`
sql := `SELECT b.digest_blob FROM artifact a, artifact_blob b WHERE a.digest = b.digest_af AND a.project_id = ? AND b.digest_blob IN (%s)`
params := []interface{}{projectID}
for _, blob := range blobs {
params = append(params, blob.Digest)

View File

@ -173,11 +173,11 @@ func (suite *DaoTestSuite) TestFindBlobsShouldUnassociatedWithProject() {
artifact1 := suite.DigestString()
artifact2 := suite.DigestString()
sql := `INSERT INTO artifact_2 ("type", media_type, manifest_media_type, digest, project_id, repository_id) VALUES ('image', 'media_type', 'manifest_media_type', ?, ?, ?)`
sql := `INSERT INTO artifact ("type", media_type, manifest_media_type, digest, project_id, repository_id) VALUES ('image', 'media_type', 'manifest_media_type', ?, ?, ?)`
suite.ExecSQL(sql, artifact1, projectID, 10)
suite.ExecSQL(sql, artifact2, projectID, 10)
defer suite.ExecSQL(`DELETE FROM artifact_2 WHERE project_id = ?`, projectID)
defer suite.ExecSQL(`DELETE FROM artifact WHERE project_id = ?`, projectID)
digest1 := suite.DigestString()
digest2 := suite.DigestString()
@ -211,7 +211,7 @@ func (suite *DaoTestSuite) TestFindBlobsShouldUnassociatedWithProject() {
suite.Len(results, 0)
}
suite.ExecSQL(`DELETE FROM artifact_2 WHERE digest = ?`, artifact2)
suite.ExecSQL(`DELETE FROM artifact WHERE digest = ?`, artifact2)
{
results, err := suite.dao.FindBlobsShouldUnassociatedWithProject(ctx, projectID, blobs)

View File

@ -92,11 +92,11 @@ func (suite *ManagerTestSuite) TestCleanupAssociationsForProject() {
artifact1 := suite.DigestString()
artifact2 := suite.DigestString()
sql := `INSERT INTO artifact_2 ("type", media_type, manifest_media_type, digest, project_id, repository_id) VALUES ('image', 'media_type', 'manifest_media_type', ?, ?, ?)`
sql := `INSERT INTO artifact ("type", media_type, manifest_media_type, digest, project_id, repository_id) VALUES ('image', 'media_type', 'manifest_media_type', ?, ?, ?)`
suite.ExecSQL(sql, artifact1, projectID, 10)
suite.ExecSQL(sql, artifact2, projectID, 10)
defer suite.ExecSQL(`DELETE FROM artifact_2 WHERE project_id = ?`, projectID)
defer suite.ExecSQL(`DELETE FROM artifact WHERE project_id = ?`, projectID)
digest1 := suite.DigestString()
digest2 := suite.DigestString()
@ -135,7 +135,7 @@ func (suite *ManagerTestSuite) TestCleanupAssociationsForProject() {
}
}
suite.ExecSQL(`DELETE FROM artifact_2 WHERE digest = ?`, artifact2)
suite.ExecSQL(`DELETE FROM artifact WHERE digest = ?`, artifact2)
{
suite.Nil(Mgr.CleanupAssociationsForProject(ctx, projectID, blobs))