feat: enhanced default processor

Signed-off-by: Yiyang Huang <huangyiyang.huangyy@bytedance.com>
This commit is contained in:
Yiyang Huang 2020-07-20 09:56:56 +08:00 committed by Yiyang Huang
parent b98b8b9159
commit b98dc97fbd
14 changed files with 845 additions and 29 deletions

View File

@ -113,5 +113,6 @@ END $$;
ALTER TABLE schedule DROP COLUMN IF EXISTS job_id;
ALTER TABLE schedule DROP COLUMN IF EXISTS status;
/*replication quay.io update vendor type*/
UPDATE registry SET type = 'quay' WHERE type = 'quay-io';
UPDATE registry SET type = 'quay' WHERE type = 'quay-io';
ALTER TABLE artifact ADD COLUMN icon varchar(255);

View File

@ -15,16 +15,20 @@
package artifact
import (
"testing"
"github.com/goharbor/harbor/src/controller/artifact/processor"
"github.com/goharbor/harbor/src/pkg/artifact"
"github.com/goharbor/harbor/src/testing/mock"
tart "github.com/goharbor/harbor/src/testing/pkg/artifact"
tpro "github.com/goharbor/harbor/src/testing/pkg/processor"
"github.com/goharbor/harbor/src/testing/pkg/registry"
"github.com/docker/distribution"
"github.com/docker/distribution/manifest/schema1"
"github.com/docker/distribution/manifest/schema2"
"github.com/goharbor/harbor/src/controller/artifact/processor"
"github.com/goharbor/harbor/src/pkg/artifact"
tart "github.com/goharbor/harbor/src/testing/pkg/artifact"
"github.com/goharbor/harbor/src/testing/pkg/registry"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/suite"
"testing"
)
var (
@ -203,6 +207,7 @@ type abstractorTestSuite struct {
argMgr *tart.FakeManager
regCli *registry.FakeClient
abstractor *abstractor
processor *tpro.Processor
}
func (a *abstractorTestSuite) SetupTest() {
@ -212,8 +217,10 @@ func (a *abstractorTestSuite) SetupTest() {
artMgr: a.argMgr,
regCli: a.regCli,
}
a.processor = &tpro.Processor{}
// clear all registered processors
processor.Registry = map[string]processor.Processor{}
processor.Registry[schema2.MediaTypeImageConfig] = a.processor
}
// docker manifest v1
@ -240,6 +247,7 @@ func (a *abstractorTestSuite) TestAbstractMetadataOfV2Manifest() {
artifact := &artifact.Artifact{
ID: 1,
}
a.processor.On("AbstractMetadata", mock.Anything, mock.Anything, mock.Anything).Return(nil)
err = a.abstractor.AbstractMetadata(nil, artifact)
a.Require().Nil(err)
a.Assert().Equal(int64(1), artifact.ID)

View File

@ -0,0 +1,77 @@
package annotation
import (
"context"
"github.com/goharbor/harbor/src/lib/log"
"github.com/goharbor/harbor/src/pkg/artifact"
reg "github.com/goharbor/harbor/src/pkg/registry"
)
const (
// GIF is icon content type image/gif
GIF = "image/gif"
// PNG is icon content type image/png
PNG = "image/png"
// JPEG is icon content type image/jpeg
JPEG = "image/jpeg"
// AnnotationPrefix is the prefix of annotation
AnnotationPrefix = "io.goharbor.artifact"
// SkipList is the key word of skip-list annotation
SkipList = "skip-list"
// Icon is the key word of icon annotation
Icon = "icon"
)
var (
// registry for registered annotation parsers
registry = map[string]Parser{}
// sortedAnnotationVersionList define the order of AnnotationParser from low to high version.
// Low version annotation parser will parser annotation first.
sortedAnnotationVersionList = make([]string, 0)
)
func init() {
v1alpha1Parser := &v1alpha1Parser{
regCli: reg.Cli,
}
RegisterAnnotationParser(v1alpha1Parser, V1alpha1)
}
// NewParser creates a new annotation parser
func NewParser() Parser {
return &parser{}
}
// Parser parses annotations in artifact manifest
type Parser interface {
// Parse parses annotations in artifact manifest, abstracts data from artifact config layer into the artifact model
Parse(ctx context.Context, artifact *artifact.Artifact, manifest []byte) (err error)
}
type parser struct{}
func (p *parser) Parse(ctx context.Context, artifact *artifact.Artifact, manifest []byte) (err error) {
for _, annotationVersion := range sortedAnnotationVersionList {
err = GetAnnotationParser(annotationVersion).Parse(ctx, artifact, manifest)
if err != nil {
return err
}
}
return nil
}
// RegisterAnnotationParser register annotation parser
func RegisterAnnotationParser(parser Parser, version string) {
registry[version] = parser
sortedAnnotationVersionList = append(sortedAnnotationVersionList, version)
log.Infof("the annotation parser to parser artifact annotation version %s registered", version)
}
// GetAnnotationParser register annotation parser
func GetAnnotationParser(version string) Parser {
return registry[version]
}

View File

@ -0,0 +1,41 @@
package annotation
import (
"testing"
fp "github.com/goharbor/harbor/src/testing/pkg/parser"
"github.com/stretchr/testify/suite"
)
type parserTestSuite struct {
suite.Suite
}
func (p *parserTestSuite) SetupTest() {
registry = map[string]Parser{}
}
func (p *parserTestSuite) TestRegisterAnnotationParser() {
// success
version := "v1alpha1"
parser := &fp.Parser{}
RegisterAnnotationParser(parser, version)
p.Equal(map[string]Parser{version: parser}, registry)
}
func (p *parserTestSuite) TestGetAnnotationParser() {
// register the parser
version := "v1alpha1"
RegisterAnnotationParser(&fp.Parser{}, "v1alpha1")
// get the parser
parser := GetAnnotationParser(version)
p.Require().NotNil(parser)
_, ok := parser.(*fp.Parser)
p.True(ok)
}
func TestProcessorTestSuite(t *testing.T) {
suite.Run(t, &parserTestSuite{})
}

View File

@ -0,0 +1,99 @@
package annotation
import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"strings"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/pkg/artifact"
reg "github.com/goharbor/harbor/src/pkg/registry"
"github.com/docker/distribution/manifest/schema2"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
)
const (
// V1alpha1 is the version of annotation parser
V1alpha1 = "v1alpha1"
)
type v1alpha1Parser struct {
regCli reg.Client
}
func (p *v1alpha1Parser) Parse(ctx context.Context, artifact *artifact.Artifact, manifest []byte) error {
if artifact.ManifestMediaType != v1.MediaTypeImageManifest && artifact.ManifestMediaType != schema2.MediaTypeManifest {
return nil
}
// get manifest
mani := &v1.Manifest{}
if err := json.Unmarshal(manifest, mani); err != nil {
return err
}
// parse skip-list annotation io.goharor.artifact.v1alpha1.skip-list
parseV1alpha1SkipList(artifact, mani)
// parse icon annotation io.goharbor.artifact.v1alpha1.icon
err := parseV1alpha1Icon(artifact, mani, p.regCli)
if err != nil {
return err
}
return nil
}
func parseV1alpha1SkipList(artifact *artifact.Artifact, manifest *v1.Manifest) {
metadata := artifact.ExtraAttrs
skipListAnnotationKey := fmt.Sprintf("%s.%s.%s", AnnotationPrefix, V1alpha1, SkipList)
skipList, ok := manifest.Config.Annotations[skipListAnnotationKey]
if ok {
skipKeyList := strings.Split(skipList, ",")
for _, skipKey := range skipKeyList {
delete(metadata, skipKey)
}
artifact.ExtraAttrs = metadata
}
}
func parseV1alpha1Icon(artifact *artifact.Artifact, manifest *v1.Manifest, reg reg.Client) error {
iconAnnotationKey := fmt.Sprintf("%s.%s.%s", AnnotationPrefix, V1alpha1, Icon)
var iconDigest string
for _, layer := range manifest.Layers {
_, ok := layer.Annotations[iconAnnotationKey]
if ok {
iconDigest = layer.Digest.String()
break
}
}
if iconDigest == "" {
return nil
}
// pull icon layer
_, icon, err := reg.PullBlob(artifact.RepositoryName, iconDigest)
if err != nil {
return err
}
// check the size of the size <= 1MB
data, err := ioutil.ReadAll(io.LimitReader(icon, 1<<20))
if err != nil {
if err == io.EOF {
return errors.New(nil).WithCode(errors.BadRequestCode).WithMessage("the maximum size of the icon is 1MB")
}
return err
}
// check the content type
contentType := http.DetectContentType(data)
switch contentType {
case GIF, PNG, JPEG:
default:
return errors.New(nil).WithCode(errors.BadRequestCode).WithMessage("unsupported content type: %s", contentType)
}
artifact.Icon = iconDigest
return nil
}

File diff suppressed because one or more lines are too long

View File

@ -45,7 +45,7 @@ type processor struct {
manifestProcessor *base.ManifestProcessor
}
func (p *processor) AbstractMetadata(ctx context.Context, art *artifact.Artifact, manifest []byte, ) error {
func (p *processor) AbstractMetadata(ctx context.Context, art *artifact.Artifact, manifest []byte) error {
cfgManiDgt := ""
// try to get the digest of the manifest that the config layer is referenced by
for _, reference := range art.References {

View File

@ -16,28 +16,44 @@ package processor
import (
"context"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/pkg/artifact"
"encoding/json"
"regexp"
"strings"
// annotation parsers will be registered
"github.com/goharbor/harbor/src/controller/artifact/annotation"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/lib/log"
"github.com/goharbor/harbor/src/pkg/artifact"
"github.com/goharbor/harbor/src/pkg/registry"
"github.com/docker/distribution/manifest/schema2"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
)
// ArtifactTypeUnknown defines the type for the unknown artifacts
const ArtifactTypeUnknown = "UNKNOWN"
const (
// ArtifactTypeUnknown defines the type for the unknown artifacts
ArtifactTypeUnknown = "UNKNOWN"
// DefaultIconDigest defines default icon layer digest
DefaultIconDigest = "sha256:da834479c923584f4cbcdecc0dac61f32bef1d51e8aae598cf16bd154efab49f"
)
var (
// DefaultProcessor is to process artifact which has no specific processor
DefaultProcessor = &defaultProcessor{regCli: registry.Cli}
artifactTypeRegExp = regexp.MustCompile(`^application/vnd\.[^.]*\.(.*)\.config\.[^.]*\+json$`)
)
// the default processor to process artifact
// currently, it only tries to parse the artifact type from media type
type defaultProcessor struct {
mediaType string
regCli registry.Client
}
func (d *defaultProcessor) GetArtifactType(ctx context.Context, artifact *artifact.Artifact) string {
// try to parse the type from the media type
strs := artifactTypeRegExp.FindStringSubmatch(d.mediaType)
strs := artifactTypeRegExp.FindStringSubmatch(artifact.MediaType)
if len(strs) == 2 {
return strings.ToUpper(strs[1])
}
@ -47,12 +63,77 @@ func (d *defaultProcessor) GetArtifactType(ctx context.Context, artifact *artifa
func (d *defaultProcessor) ListAdditionTypes(ctx context.Context, artifact *artifact.Artifact) []string {
return nil
}
// The default processor will process user-defined artifact.
// AbstractMetadata will abstract data in a specific way.
// Annotation keys in artifact annotation will decide which content will be processed in artifact.
// Here is a manifest example:
// {
// "schemaVersion": 2,
// "config": {
// "mediaType": "application/vnd.caicloud.model.config.v1alpha1+json",
// "digest": "sha256:be948daf0e22f264ea70b713ea0db35050ae659c185706aa2fad74834455fe8c",
// "size": 187,
// "annotations": {
// "io.goharbor.artifact.v1alpha1.skip-list": "metrics,git"
// }
// },
// "layers": [
// {
// "mediaType": "image/png",
// "digest": "sha256:d923b93eadde0af5c639a972710a4d919066aba5d0dfbf4b9385099f70272da0",
// "size": 166015,
// "annotations": {
// "io.goharbor.artifact.v1alpha1.icon": ""
// }
// },
// {
// "mediaType": "application/tar+gzip",
// "digest": "sha256:d923b93eadde0af5c639a972710a4d919066aba5d0dfbf4b9385099f70272da0",
// "size": 166015
// }
// ]
// }
func (d *defaultProcessor) AbstractMetadata(ctx context.Context, artifact *artifact.Artifact, manifest []byte) error {
// do nothing currently
// we can extend this function to abstract the metadata in the future if needed
if artifact.ManifestMediaType != v1.MediaTypeImageManifest && artifact.ManifestMediaType != schema2.MediaTypeManifest {
return nil
}
// get manifest
mani := &v1.Manifest{}
if err := json.Unmarshal(manifest, mani); err != nil {
return err
}
// get config layer
_, blob, err := d.regCli.PullBlob(artifact.RepositoryName, mani.Config.Digest.String())
if err != nil {
return err
}
defer blob.Close()
// parse metadata from config layer
metadata := map[string]interface{}{}
// Some artifact may not have empty config layer.
if mani.Config.Size != 0 {
if err := json.NewDecoder(blob).Decode(&metadata); err != nil {
return err
}
}
// Populate all metadata into the ExtraAttrs first.
artifact.ExtraAttrs = metadata
annotationParser := annotation.NewParser()
err = annotationParser.Parse(ctx, artifact, manifest)
if err != nil {
log.Errorf("the annotation parser parse annotation for artifact error: %v", err)
}
if artifact.Icon == "" {
artifact.Icon = DefaultIconDigest
}
return nil
}
func (d *defaultProcessor) AbstractAddition(ctx context.Context, artifact *artifact.Artifact, addition string) (*Addition, error) {
// Addition not support for user-defined artifact yet.
// It will be support in the future.
// return error directly
return nil, errors.New(nil).WithCode(errors.BadRequestCode).
WithMessage("the processor for artifact %s not found, cannot get the addition", artifact.Type)

View File

@ -15,39 +15,175 @@
package processor
import (
"github.com/stretchr/testify/suite"
"context"
"github.com/goharbor/harbor/src/pkg/distribution"
"github.com/goharbor/harbor/src/testing/mock"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"io/ioutil"
"strings"
"testing"
"github.com/goharbor/harbor/src/pkg/artifact"
"github.com/goharbor/harbor/src/testing/pkg/parser"
"github.com/goharbor/harbor/src/testing/pkg/registry"
"github.com/stretchr/testify/suite"
)
var (
ormbConfig = `{
"created": "2015-10-31T22:22:56.015925234Z",
"author": "Ce Gao <gaoce@caicloud.io>",
"description": "CNN Model",
"tags": [
"cv"
],
"labels": {
"tensorflow.version": "2.0.0"
},
"framework": "TensorFlow",
"format": "SavedModel",
"size": 9223372036854775807,
"metrics": [
{
"name": "acc",
"value": "0.9"
}
],
"hyperparameters": [
{
"name": "batch_size",
"value": "32"
}
],
"signature": {
"inputs": [
{
"name": "input_1",
"size": [
224,
224,
3
],
"dtype": "float64"
}
],
"outputs": [
{
"name": "output_1",
"size": [
1,
1000
],
"dtype": "float64"
}
],
"layers": [
{
"name": "conv"
}
]
},
"training": {
"git": {
"repository": "git@github.com:caicloud/ormb.git",
"revision": "22f1d8406d464b0c0874075539c1f2e96c253775"
}
},
"dataset": {
"git": {
"repository": "git@github.com:caicloud/ormb.git",
"revision": "22f1d8406d464b0c0874075539c1f2e96c253775"
}
}
}`
ormbManifestWithoutIcon = `{
"schemaVersion":2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"config":{
"mediaType":"application/vnd.caicloud.model.config.v1alpha1+json",
"digest":"sha256:be948daf0e22f264ea70b713ea0db35050ae659c185706aa2fad74834455fe8c",
"size":187,
"annotations": {
"io.goharbor.artifact.v1alpha1.skip-list": "metrics,git"
}
},
"layers":[
{
"mediaType":"application/tar+gzip",
"digest":"sha256:eb6063fecbb50a9d98268cb61746a0fd62a27a4af9e850ffa543a1a62d3948b2",
"size":166022
}
]
}`
)
type defaultProcessorTestSuite struct {
suite.Suite
processor *defaultProcessor
parser *parser.Parser
regCli *registry.FakeClient
}
func (d *defaultProcessorTestSuite) SetupTest() {
d.regCli = &registry.FakeClient{}
d.processor = &defaultProcessor{
regCli: d.regCli,
}
d.parser = &parser.Parser{}
}
func (d *defaultProcessorTestSuite) TestGetArtifactType() {
mediaType := ""
processor := &defaultProcessor{mediaType: mediaType}
typee := processor.GetArtifactType(nil, nil)
art := &artifact.Artifact{MediaType: mediaType}
processor := &defaultProcessor{}
typee := processor.GetArtifactType(nil, art)
d.Equal(ArtifactTypeUnknown, typee)
mediaType = "unknown"
processor = &defaultProcessor{mediaType: mediaType}
typee = processor.GetArtifactType(nil, nil)
art = &artifact.Artifact{MediaType: mediaType}
processor = &defaultProcessor{}
typee = processor.GetArtifactType(nil, art)
d.Equal(ArtifactTypeUnknown, typee)
mediaType = "application/vnd.oci.image.config.v1+json"
processor = &defaultProcessor{mediaType: mediaType}
typee = processor.GetArtifactType(nil, nil)
art = &artifact.Artifact{MediaType: mediaType}
processor = &defaultProcessor{}
typee = processor.GetArtifactType(nil, art)
d.Equal("IMAGE", typee)
mediaType = "application/vnd.cncf.helm.chart.config.v1+json"
processor = &defaultProcessor{mediaType: mediaType}
typee = processor.GetArtifactType(nil, nil)
art = &artifact.Artifact{MediaType: mediaType}
processor = &defaultProcessor{}
typee = processor.GetArtifactType(nil, art)
d.Equal("HELM.CHART", typee)
mediaType = "application/vnd.sylabs.sif.config.v1+json"
processor = &defaultProcessor{mediaType: mediaType}
typee = processor.GetArtifactType(nil, nil)
art = &artifact.Artifact{MediaType: mediaType}
processor = &defaultProcessor{}
typee = processor.GetArtifactType(nil, art)
d.Equal("SIF", typee)
mediaType = "application/vnd.caicloud.model.config.v1alpha1+json"
art = &artifact.Artifact{MediaType: mediaType}
processor = &defaultProcessor{}
typee = processor.GetArtifactType(nil, art)
d.Equal("MODEL", typee)
}
func (d *defaultProcessorTestSuite) TestAbstractMetadata() {
manifest, _, err := distribution.UnmarshalManifest(v1.MediaTypeImageManifest, []byte(ormbManifestWithoutIcon))
d.Require().Nil(err)
manifestMediaType, content, err := manifest.Payload()
d.Require().Nil(err)
configBlob := ioutil.NopCloser(strings.NewReader(ormbConfig))
art := &artifact.Artifact{ManifestMediaType: manifestMediaType}
d.regCli.On("PullBlob").Return(0, configBlob, nil)
d.parser.On("Parse", context.TODO(), mock.AnythingOfType("*artifact.Artifact"), mock.AnythingOfType("[]byte")).Return(nil)
err = d.processor.AbstractMetadata(nil, art, content)
d.Require().Nil(err)
d.Equal(DefaultIconDigest, art.Icon)
}
func TestDefaultProcessorTestSuite(t *testing.T) {

View File

@ -17,6 +17,7 @@ package processor
import (
"context"
"fmt"
"github.com/goharbor/harbor/src/lib/log"
"github.com/goharbor/harbor/src/pkg/artifact"
)
@ -66,7 +67,7 @@ func Get(mediaType string) Processor {
// no registered processor found, use the default one
if processor == nil {
log.Debugf("the processor for media type %s not found, use the default one", mediaType)
processor = &defaultProcessor{mediaType: mediaType}
processor = DefaultProcessor
}
return processor
}

View File

@ -40,6 +40,7 @@ type Artifact struct {
PullTime time.Time `orm:"column(pull_time)"`
ExtraAttrs string `orm:"column(extra_attrs)"` // json string
Annotations string `orm:"column(annotations);type(jsonb)"` // json string
Icon string `orm:"column(icon)"` // icon layer digest
}
// TableName for artifact

View File

@ -43,6 +43,7 @@ type Artifact struct {
ExtraAttrs map[string]interface{} `json:"extra_attrs"` // only contains the simple attributes specific for the different artifact type, most of them should come from the config layer
Annotations map[string]string `json:"annotations"`
References []*Reference `json:"references"` // child artifacts referenced by the parent artifact if the artifact is an index
Icon string `json:"icon"` // icon layer digest
}
// IsImageIndex returns true when artifact is image index

View File

@ -0,0 +1,30 @@
// Code generated by mockery v2.1.0. DO NOT EDIT.
package parser
import (
context "context"
artifact "github.com/goharbor/harbor/src/pkg/artifact"
mock "github.com/stretchr/testify/mock"
)
// Parser is an autogenerated mock type for the Parser type
type Parser struct {
mock.Mock
}
// Parse provides a mock function with given fields: ctx, _a1, manifest
func (_m *Parser) Parse(ctx context.Context, _a1 *artifact.Artifact, manifest []byte) error {
ret := _m.Called(ctx, _a1, manifest)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, *artifact.Artifact, []byte) error); ok {
r0 = rf(ctx, _a1, manifest)
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,85 @@
// Code generated by mockery v2.1.0. DO NOT EDIT.
package processor
import (
context "context"
artifact "github.com/goharbor/harbor/src/pkg/artifact"
mock "github.com/stretchr/testify/mock"
processor "github.com/goharbor/harbor/src/controller/artifact/processor"
)
// Processor is an autogenerated mock type for the Processor type
type Processor struct {
mock.Mock
}
// AbstractAddition provides a mock function with given fields: ctx, _a1, additionType
func (_m *Processor) AbstractAddition(ctx context.Context, _a1 *artifact.Artifact, additionType string) (*processor.Addition, error) {
ret := _m.Called(ctx, _a1, additionType)
var r0 *processor.Addition
if rf, ok := ret.Get(0).(func(context.Context, *artifact.Artifact, string) *processor.Addition); ok {
r0 = rf(ctx, _a1, additionType)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*processor.Addition)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *artifact.Artifact, string) error); ok {
r1 = rf(ctx, _a1, additionType)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// AbstractMetadata provides a mock function with given fields: ctx, _a1, manifest
func (_m *Processor) AbstractMetadata(ctx context.Context, _a1 *artifact.Artifact, manifest []byte) error {
ret := _m.Called(ctx, _a1, manifest)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, *artifact.Artifact, []byte) error); ok {
r0 = rf(ctx, _a1, manifest)
} else {
r0 = ret.Error(0)
}
return r0
}
// GetArtifactType provides a mock function with given fields: ctx, _a1
func (_m *Processor) GetArtifactType(ctx context.Context, _a1 *artifact.Artifact) string {
ret := _m.Called(ctx, _a1)
var r0 string
if rf, ok := ret.Get(0).(func(context.Context, *artifact.Artifact) string); ok {
r0 = rf(ctx, _a1)
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// ListAdditionTypes provides a mock function with given fields: ctx, _a1
func (_m *Processor) ListAdditionTypes(ctx context.Context, _a1 *artifact.Artifact) []string {
ret := _m.Called(ctx, _a1)
var r0 []string
if rf, ok := ret.Get(0).(func(context.Context, *artifact.Artifact) []string); ok {
r0 = rf(ctx, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]string)
}
}
return r0
}