Expose conversion code as package (#1)

This commit is contained in:
Jacob Scherffenberg 2022-06-15 11:02:05 +02:00 коммит произвёл JacobSMoller
Родитель f55a7d25cf
Коммит 144a04e6c9
11 изменённых файлов: 559 добавлений и 539 удалений

4
.github/workflows/go.yml поставляемый
Просмотреть файл

@ -17,12 +17,12 @@ jobs:
uses: actions/setup-go@v2
with:
go-version: '^1.15'
- name: Checkout code
uses: actions/checkout@v2
- name: Install protoc
uses: arduino/setup-protoc@v1
- name: Install protoc-gen-go
run: go get -u github.com/golang/protobuf/protoc-gen-go
- name: Checkout code
uses: actions/checkout@v2
# Formatting, go mod tidy, and re-generate proto extension code
- name: Run go fmt on all modules

Просмотреть файл

@ -8,7 +8,7 @@ So you can reuse existing data definitions in .proto for BigQuery with this plug
## Installation
```sh
go get github.com/GoogleCloudPlatform/protoc-gen-bq-schema
go install github.com/GoogleCloudPlatform/protoc-gen-bq-schema@latest
```
## Usage
@ -74,8 +74,8 @@ The message `foo.Baz` is also ignored because it is not the first message in the
### Support for PolicyTags
`protoc-gen-bq-schema` now supports [policyTags](https://cloud.google.com/bigquery/docs/column-level-security-intro).
You can define a `Policy Tag` for a field in `.proto` file.
`protoc-gen-bq-schema` now supports [policyTags](https://cloud.google.com/bigquery/docs/column-level-security-intro).
You can define a `Policy Tag` for a field in `.proto` file.
### Example with Policy Tags
Suppose that you have the following `test_table.proto`
@ -94,7 +94,7 @@ message TestTable{
policy_tags : "private"
}
];
string b = 2 [(gen_bq_schema.bigquery).policy_tags="public"];
message Nested {
@ -165,7 +165,7 @@ It will generate the following `JSON` schema
]
```
The policy tag name provided in `test_table.proto` file is taken as it is. According to [Google Docs](https://cloud.google.com/bigquery/docs/column-level-security-intro),
The policy tag name provided in `test_table.proto` file is taken as it is. According to [Google Docs](https://cloud.google.com/bigquery/docs/column-level-security-intro),
the policy tag string should be of the following format
`projects/project-id/locations/location/taxonomies/taxonomy-id/policyTags/policytag-id`

2
go.mod
Просмотреть файл

@ -5,5 +5,5 @@ go 1.15
require (
github.com/golang/glog v1.0.0
github.com/golang/protobuf v1.5.2
google.golang.org/protobuf v1.26.0
google.golang.org/protobuf v1.28.0
)

3
go.sum
Просмотреть файл

@ -8,5 +8,6 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=

520
main.go
Просмотреть файл

@ -25,537 +25,21 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"strings"
"github.com/GoogleCloudPlatform/protoc-gen-bq-schema/protos"
"github.com/GoogleCloudPlatform/protoc-gen-bq-schema/pkg/converter"
"github.com/golang/glog"
plugin "github.com/golang/protobuf/protoc-gen-go/plugin"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/proto"
descriptor "google.golang.org/protobuf/types/descriptorpb"
)
var (
globalPkg = &ProtoPackage{
name: "",
parent: nil,
children: make(map[string]*ProtoPackage),
types: make(map[string]*descriptor.DescriptorProto),
comments: make(map[string]Comments),
path: make(map[string]string),
}
)
// Field describes the schema of a field in BigQuery.
type Field struct {
Name string `json:"name"`
Type string `json:"type"`
Mode string `json:"mode"`
Description string `json:"description,omitempty"`
Fields []*Field `json:"fields,omitempty"`
PolicyTags *PolicyTags `json:"policyTags,omitempty"`
}
// PolicyTags describes the structure of a Policy Tag
type PolicyTags struct {
Names []string `json:"names,omitempty"`
}
// ProtoPackage describes a package of Protobuf, which is an container of message types.
type ProtoPackage struct {
name string
parent *ProtoPackage
children map[string]*ProtoPackage
types map[string]*descriptor.DescriptorProto
comments map[string]Comments
path map[string]string
}
func registerType(pkgName *string, msg *descriptor.DescriptorProto, comments Comments, path string) {
pkg := globalPkg
if pkgName != nil {
for _, node := range strings.Split(*pkgName, ".") {
if pkg == globalPkg && node == "" {
// Skips leading "."
continue
}
child, ok := pkg.children[node]
if !ok {
child = &ProtoPackage{
name: pkg.name + "." + node,
parent: pkg,
children: make(map[string]*ProtoPackage),
types: make(map[string]*descriptor.DescriptorProto),
comments: make(map[string]Comments),
path: make(map[string]string),
}
pkg.children[node] = child
}
pkg = child
}
}
pkg.types[msg.GetName()] = msg
pkg.comments[msg.GetName()] = comments
pkg.path[msg.GetName()] = path
}
func (pkg *ProtoPackage) lookupType(name string) (*descriptor.DescriptorProto, bool, Comments, string) {
if strings.HasPrefix(name, ".") {
return globalPkg.relativelyLookupType(name[1:len(name)])
}
for ; pkg != nil; pkg = pkg.parent {
if desc, ok, comments, path := pkg.relativelyLookupType(name); ok {
return desc, ok, comments, path
}
}
return nil, false, Comments{}, ""
}
func relativelyLookupNestedType(desc *descriptor.DescriptorProto, name string) (*descriptor.DescriptorProto, bool, string) {
components := strings.Split(name, ".")
path := ""
componentLoop:
for _, component := range components {
for nestedIndex, nested := range desc.GetNestedType() {
if nested.GetName() == component {
desc = nested
path = fmt.Sprintf("%s.%d.%d", path, subMessagePath, nestedIndex)
continue componentLoop
}
}
glog.Infof("no such nested message %s in %s", component, desc.GetName())
return nil, false, ""
}
return desc, true, strings.Trim(path, ".")
}
func (pkg *ProtoPackage) relativelyLookupType(name string) (*descriptor.DescriptorProto, bool, Comments, string) {
components := strings.SplitN(name, ".", 2)
switch len(components) {
case 0:
glog.V(1).Info("empty message name")
return nil, false, Comments{}, ""
case 1:
found, ok := pkg.types[components[0]]
return found, ok, pkg.comments[components[0]], pkg.path[components[0]]
case 2:
glog.Infof("looking for %s in %s at %s (%v)", components[1], components[0], pkg.name, pkg)
if child, ok := pkg.children[components[0]]; ok {
found, ok, comments, path := child.relativelyLookupType(components[1])
return found, ok, comments, path
}
if msg, ok := pkg.types[components[0]]; ok {
found, ok, path := relativelyLookupNestedType(msg, components[1])
return found, ok, pkg.comments[components[0]], pkg.path[components[0]] + "." + path
}
glog.V(1).Infof("no such package nor message %s in %s", components[0], pkg.name)
return nil, false, Comments{}, ""
default:
glog.Fatal("not reached")
return nil, false, Comments{}, ""
}
}
func (pkg *ProtoPackage) relativelyLookupPackage(name string) (*ProtoPackage, bool) {
components := strings.Split(name, ".")
for _, c := range components {
var ok bool
pkg, ok = pkg.children[c]
if !ok {
return nil, false
}
}
return pkg, true
}
var (
typeFromWKT = map[string]string{
".google.protobuf.Int32Value": "INTEGER",
".google.protobuf.Int64Value": "INTEGER",
".google.protobuf.UInt32Value": "INTEGER",
".google.protobuf.UInt64Value": "INTEGER",
".google.protobuf.DoubleValue": "FLOAT",
".google.protobuf.FloatValue": "FLOAT",
".google.protobuf.BoolValue": "BOOLEAN",
".google.protobuf.StringValue": "STRING",
".google.protobuf.BytesValue": "BYTES",
".google.protobuf.Duration": "STRING",
".google.protobuf.Timestamp": "TIMESTAMP",
}
typeFromFieldType = map[descriptor.FieldDescriptorProto_Type]string{
descriptor.FieldDescriptorProto_TYPE_DOUBLE: "FLOAT",
descriptor.FieldDescriptorProto_TYPE_FLOAT: "FLOAT",
descriptor.FieldDescriptorProto_TYPE_INT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_UINT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_INT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_UINT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_FIXED64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_FIXED32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SFIXED32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SFIXED64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SINT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SINT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_STRING: "STRING",
descriptor.FieldDescriptorProto_TYPE_BYTES: "BYTES",
descriptor.FieldDescriptorProto_TYPE_ENUM: "STRING",
descriptor.FieldDescriptorProto_TYPE_BOOL: "BOOLEAN",
descriptor.FieldDescriptorProto_TYPE_GROUP: "RECORD",
descriptor.FieldDescriptorProto_TYPE_MESSAGE: "RECORD",
}
modeFromFieldLabel = map[descriptor.FieldDescriptorProto_Label]string{
descriptor.FieldDescriptorProto_LABEL_OPTIONAL: "NULLABLE",
descriptor.FieldDescriptorProto_LABEL_REQUIRED: "REQUIRED",
descriptor.FieldDescriptorProto_LABEL_REPEATED: "REPEATED",
}
)
func convertField(
curPkg *ProtoPackage,
desc *descriptor.FieldDescriptorProto,
msgOpts *protos.BigQueryMessageOptions,
parentMessages map[*descriptor.DescriptorProto]bool,
comments Comments,
path string) (*Field, error) {
field := &Field{
Name: desc.GetName(),
}
if msgOpts.GetUseJsonNames() && desc.GetJsonName() != "" {
field.Name = desc.GetJsonName()
}
var ok bool
field.Mode, ok = modeFromFieldLabel[desc.GetLabel()]
if !ok {
return nil, fmt.Errorf("unrecognized field label: %s", desc.GetLabel().String())
}
field.Type, ok = typeFromFieldType[desc.GetType()]
if !ok {
return nil, fmt.Errorf("unrecognized field type: %s", desc.GetType().String())
}
if comment := comments.Get(path); comment != "" {
field.Description = comment
}
opts := desc.GetOptions()
if opts != nil && proto.HasExtension(opts, protos.E_Bigquery) {
opt := proto.GetExtension(opts, protos.E_Bigquery).(*protos.BigQueryFieldOptions)
if opt.Ignore {
// skip the field below
return nil, nil
}
if opt.Require {
field.Mode = "REQUIRED"
}
if len(opt.TypeOverride) > 0 {
field.Type = opt.TypeOverride
}
if len(opt.Name) > 0 {
field.Name = opt.Name
}
if len(opt.Description) > 0 {
field.Description = opt.Description
}
if len(opt.PolicyTags) > 0 {
field.PolicyTags = &PolicyTags{
Names: []string{opt.PolicyTags},
}
}
}
if field.Type != "RECORD" {
return field, nil
}
if t, ok := typeFromWKT[desc.GetTypeName()]; ok {
field.Type = t
return field, nil
}
fields, err := convertFieldsForType(curPkg, desc.GetTypeName(), parentMessages)
if err != nil {
return nil, err
}
if len(fields) == 0 { // discard RECORDs that would have zero fields
return nil, nil
}
field.Fields = fields
return field, nil
}
func convertExtraField(curPkg *ProtoPackage, extraFieldDefinition string, parentMessages map[*descriptor.DescriptorProto]bool) (*Field, error) {
parts := strings.Split(extraFieldDefinition, ":")
if len(parts) < 2 {
return nil, fmt.Errorf("expecting at least 2 parts in extra field definition separated by colon, got %d", len(parts))
}
field := &Field{
Name: parts[0],
Type: parts[1],
Mode: "NULLABLE",
}
modeIndex := 2
if field.Type == "RECORD" {
modeIndex = 3
}
if len(parts) > modeIndex {
field.Mode = parts[modeIndex]
}
if field.Type != "RECORD" {
return field, nil
}
if len(parts) < 3 {
return nil, fmt.Errorf("extra field %s has no type defined", field.Type)
}
typeName := parts[2]
if t, ok := typeFromWKT[typeName]; ok {
field.Type = t
return field, nil
}
fields, err := convertFieldsForType(curPkg, typeName, parentMessages)
if err != nil {
return nil, err
}
if len(fields) == 0 { // discard RECORDs that would have zero fields
return nil, nil
}
field.Fields = fields
return field, nil
}
func convertFieldsForType(curPkg *ProtoPackage,
typeName string,
parentMessages map[*descriptor.DescriptorProto]bool) ([]*Field, error) {
recordType, ok, comments, path := curPkg.lookupType(typeName)
if !ok {
return nil, fmt.Errorf("no such message type named %s", typeName)
}
fieldMsgOpts, err := getBigqueryMessageOptions(recordType)
if err != nil {
return nil, err
}
return convertMessageType(curPkg, recordType, fieldMsgOpts, parentMessages, comments, path)
}
func convertMessageType(
curPkg *ProtoPackage,
msg *descriptor.DescriptorProto,
opts *protos.BigQueryMessageOptions,
parentMessages map[*descriptor.DescriptorProto]bool,
comments Comments,
path string) (schema []*Field, err error) {
if parentMessages[msg] {
glog.Infof("Detected recursion for message %s, ignoring subfields", *msg.Name)
return
}
if glog.V(4) {
glog.Info("Converting message: ", prototext.Format(msg))
}
parentMessages[msg] = true
for fieldIndex, fieldDesc := range msg.GetField() {
fieldCommentPath := fmt.Sprintf("%s.%d.%d", path, fieldPath, fieldIndex)
field, err := convertField(curPkg, fieldDesc, opts, parentMessages, comments, fieldCommentPath)
if err != nil {
glog.Errorf("Failed to convert field %s in %s: %v", fieldDesc.GetName(), msg.GetName(), err)
return nil, err
}
// if we got no error and the field is nil, skip it
if field != nil {
schema = append(schema, field)
}
}
for _, extraField := range opts.GetExtraFields() {
field, err := convertExtraField(curPkg, extraField, parentMessages)
if err != nil {
glog.Errorf("Failed to convert extra field %s in %s: %v", extraField, msg.GetName(), err)
return nil, err
}
schema = append(schema, field)
}
parentMessages[msg] = false
return
}
func convertFile(file *descriptor.FileDescriptorProto) ([]*plugin.CodeGeneratorResponse_File, error) {
name := path.Base(file.GetName())
pkg, ok := globalPkg.relativelyLookupPackage(file.GetPackage())
if !ok {
return nil, fmt.Errorf("no such package found: %s", file.GetPackage())
}
comments := ParseComments(file)
response := []*plugin.CodeGeneratorResponse_File{}
for msgIndex, msg := range file.GetMessageType() {
path := fmt.Sprintf("%d.%d", messagePath, msgIndex)
opts, err := getBigqueryMessageOptions(msg)
if err != nil {
return nil, err
}
if opts == nil {
continue
}
tableName := opts.GetTableName()
if len(tableName) == 0 {
continue
}
glog.V(2).Info("Generating schema for a message type ", msg.GetName())
schema, err := convertMessageType(pkg, msg, opts, make(map[*descriptor.DescriptorProto]bool), comments, path)
if err != nil {
glog.Errorf("Failed to convert %s: %v", name, err)
return nil, err
}
jsonSchema, err := json.MarshalIndent(schema, "", " ")
if err != nil {
glog.Error("Failed to encode schema", err)
return nil, err
}
resFile := &plugin.CodeGeneratorResponse_File{
Name: proto.String(fmt.Sprintf("%s/%s.schema", strings.Replace(file.GetPackage(), ".", "/", -1), tableName)),
Content: proto.String(string(jsonSchema)),
}
response = append(response, resFile)
}
return response, nil
}
// getBigqueryMessageOptions returns the bigquery options for the given message.
// If an error is encountered, it is returned instead. If no error occurs, but
// the message has no gen_bq_schema.bigquery_opts option, this function returns
// nil, nil.
func getBigqueryMessageOptions(msg *descriptor.DescriptorProto) (*protos.BigQueryMessageOptions, error) {
options := msg.GetOptions()
if options == nil {
return nil, nil
}
if !proto.HasExtension(options, protos.E_BigqueryOpts) {
return nil, nil
}
return proto.GetExtension(options, protos.E_BigqueryOpts).(*protos.BigQueryMessageOptions), nil
}
// handleSingleMessageOpt handles --bq-schema_opt=single-message in protoc params.
// providing that param tells protoc-gen-bq-schema to treat each proto files only contains one top-level type.
// if a file contains no message types, then this function simply does nothing.
// if a file contains more than one message types, then only the first message type will be processed.
// in that case, the table names will follow the proto file names.
func handleSingleMessageOpt(file *descriptor.FileDescriptorProto, requestParam string) {
if !strings.Contains(requestParam, "single-message") || len(file.GetMessageType()) == 0 {
return
}
file.MessageType = file.GetMessageType()[:1]
message := file.GetMessageType()[0]
message.Options = &descriptor.MessageOptions{}
fileName := file.GetName()
proto.SetExtension(message.GetOptions(), protos.E_BigqueryOpts, &protos.BigQueryMessageOptions{
TableName: fileName[strings.LastIndexByte(fileName, '/')+1 : strings.LastIndexByte(fileName, '.')],
})
}
func convert(req *plugin.CodeGeneratorRequest) (*plugin.CodeGeneratorResponse, error) {
generateTargets := make(map[string]bool)
for _, file := range req.GetFileToGenerate() {
generateTargets[file] = true
}
res := &plugin.CodeGeneratorResponse{}
for _, file := range req.GetProtoFile() {
for msgIndex, msg := range file.GetMessageType() {
glog.V(1).Infof("Loading a message type %s from package %s", msg.GetName(), file.GetPackage())
registerType(file.Package, msg, ParseComments(file), fmt.Sprintf("%d.%d", messagePath, msgIndex))
}
}
for _, file := range req.GetProtoFile() {
if _, ok := generateTargets[file.GetName()]; ok {
glog.V(1).Info("Converting ", file.GetName())
handleSingleMessageOpt(file, req.GetParameter())
converted, err := convertFile(file)
if err != nil {
res.Error = proto.String(fmt.Sprintf("Failed to convert %s: %v", file.GetName(), err))
return res, err
}
res.File = append(res.File, converted...)
}
}
return res, nil
}
func convertFrom(rd io.Reader) (*plugin.CodeGeneratorResponse, error) {
glog.V(1).Info("Reading code generation request")
input, err := ioutil.ReadAll(rd)
if err != nil {
glog.Error("Failed to read request:", err)
return nil, err
}
req := &plugin.CodeGeneratorRequest{}
err = proto.Unmarshal(input, req)
if err != nil {
glog.Error("Can't unmarshal input:", err)
return nil, err
}
glog.V(1).Info("Converting input")
return convert(req)
}
func main() {
flag.Parse()
ok := true
glog.Info("Processing code generator request")
res, err := convertFrom(os.Stdin)
res, err := converter.ConvertFrom(os.Stdin)
if err != nil {
ok = false
if res == nil {

Просмотреть файл

@ -1,4 +1,4 @@
package main
package converter
import (
"strconv"

Просмотреть файл

@ -1,10 +1,10 @@
package main
package converter
import (
"reflect"
"testing"
"github.com/golang/protobuf/protoc-gen-go/descriptor"
descriptor "google.golang.org/protobuf/types/descriptorpb"
)
func TestParseComments(t *testing.T) {
@ -16,12 +16,12 @@ func TestParseComments(t *testing.T) {
&descriptor.FileDescriptorProto{
SourceCodeInfo: &descriptor.SourceCodeInfo{
Location: []*descriptor.SourceCodeInfo_Location{
&descriptor.SourceCodeInfo_Location{
{
Path: []int32{4, 0},
LeadingComments: &leadingComment,
TrailingComments: &trailingComment,
},
&descriptor.SourceCodeInfo_Location{
{
Path: []int32{4, 0, 3, 0, 2, 0},
LeadingComments: &subMessageFieldLeadingComment,
TrailingComments: nil,
@ -46,7 +46,7 @@ func TestParseCommentsWithoutComments(t *testing.T) {
&descriptor.FileDescriptorProto{
SourceCodeInfo: &descriptor.SourceCodeInfo{
Location: []*descriptor.SourceCodeInfo_Location{
&descriptor.SourceCodeInfo_Location{
{
Path: []int32{4, 0},
},
},
@ -67,7 +67,7 @@ func TestCommentsGet(t *testing.T) {
&descriptor.FileDescriptorProto{
SourceCodeInfo: &descriptor.SourceCodeInfo{
Location: []*descriptor.SourceCodeInfo_Location{
&descriptor.SourceCodeInfo_Location{
{
Path: []int32{4, 0},
LeadingComments: &comment,
},

434
pkg/converter/convert.go Normal file
Просмотреть файл

@ -0,0 +1,434 @@
package converter
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"path"
"strings"
"github.com/GoogleCloudPlatform/protoc-gen-bq-schema/protos"
"github.com/golang/glog"
plugin "github.com/golang/protobuf/protoc-gen-go/plugin"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/proto"
descriptor "google.golang.org/protobuf/types/descriptorpb"
)
var (
typeFromWKT = map[string]string{
".google.protobuf.Int32Value": "INTEGER",
".google.protobuf.Int64Value": "INTEGER",
".google.protobuf.UInt32Value": "INTEGER",
".google.protobuf.UInt64Value": "INTEGER",
".google.protobuf.DoubleValue": "FLOAT",
".google.protobuf.FloatValue": "FLOAT",
".google.protobuf.BoolValue": "BOOLEAN",
".google.protobuf.StringValue": "STRING",
".google.protobuf.BytesValue": "BYTES",
".google.protobuf.Duration": "STRING",
".google.protobuf.Timestamp": "TIMESTAMP",
}
typeFromFieldType = map[descriptor.FieldDescriptorProto_Type]string{
descriptor.FieldDescriptorProto_TYPE_DOUBLE: "FLOAT",
descriptor.FieldDescriptorProto_TYPE_FLOAT: "FLOAT",
descriptor.FieldDescriptorProto_TYPE_INT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_UINT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_INT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_UINT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_FIXED64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_FIXED32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SFIXED32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SFIXED64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SINT32: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_SINT64: "INTEGER",
descriptor.FieldDescriptorProto_TYPE_STRING: "STRING",
descriptor.FieldDescriptorProto_TYPE_BYTES: "BYTES",
descriptor.FieldDescriptorProto_TYPE_ENUM: "STRING",
descriptor.FieldDescriptorProto_TYPE_BOOL: "BOOLEAN",
descriptor.FieldDescriptorProto_TYPE_GROUP: "RECORD",
descriptor.FieldDescriptorProto_TYPE_MESSAGE: "RECORD",
}
modeFromFieldLabel = map[descriptor.FieldDescriptorProto_Label]string{
descriptor.FieldDescriptorProto_LABEL_OPTIONAL: "NULLABLE",
descriptor.FieldDescriptorProto_LABEL_REQUIRED: "REQUIRED",
descriptor.FieldDescriptorProto_LABEL_REPEATED: "REPEATED",
}
)
// Field describes the schema of a field in BigQuery.
type Field struct {
Name string `json:"name"`
Type string `json:"type"`
Mode string `json:"mode"`
Description string `json:"description,omitempty"`
Fields []*Field `json:"fields,omitempty"`
PolicyTags *PolicyTags `json:"policyTags,omitempty"`
}
// PolicyTags describes the structure of a Policy Tag
type PolicyTags struct {
Names []string `json:"names,omitempty"`
}
func registerType(pkgName *string, msg *descriptor.DescriptorProto, comments Comments, path string) {
pkg := globalPkg
if pkgName != nil {
for _, node := range strings.Split(*pkgName, ".") {
if pkg == globalPkg && node == "" {
// Skips leading "."
continue
}
child, ok := pkg.children[node]
if !ok {
child = &ProtoPackage{
name: pkg.name + "." + node,
parent: pkg,
children: make(map[string]*ProtoPackage),
types: make(map[string]*descriptor.DescriptorProto),
comments: make(map[string]Comments),
path: make(map[string]string),
}
pkg.children[node] = child
}
pkg = child
}
}
pkg.types[msg.GetName()] = msg
pkg.comments[msg.GetName()] = comments
pkg.path[msg.GetName()] = path
}
func convertField(
curPkg *ProtoPackage,
desc *descriptor.FieldDescriptorProto,
msgOpts *protos.BigQueryMessageOptions,
parentMessages map[*descriptor.DescriptorProto]bool,
comments Comments,
path string) (*Field, error) {
field := &Field{
Name: desc.GetName(),
}
if msgOpts.GetUseJsonNames() && desc.GetJsonName() != "" {
field.Name = desc.GetJsonName()
}
var ok bool
field.Mode, ok = modeFromFieldLabel[desc.GetLabel()]
if !ok {
return nil, fmt.Errorf("unrecognized field label: %s", desc.GetLabel().String())
}
field.Type, ok = typeFromFieldType[desc.GetType()]
if !ok {
return nil, fmt.Errorf("unrecognized field type: %s", desc.GetType().String())
}
if comment := comments.Get(path); comment != "" {
field.Description = comment
}
opts := desc.GetOptions()
if opts != nil && proto.HasExtension(opts, protos.E_Bigquery) {
opt := proto.GetExtension(opts, protos.E_Bigquery).(*protos.BigQueryFieldOptions)
if opt.Ignore {
// skip the field below
return nil, nil
}
if opt.Require {
field.Mode = "REQUIRED"
}
if len(opt.TypeOverride) > 0 {
field.Type = opt.TypeOverride
}
if len(opt.Name) > 0 {
field.Name = opt.Name
}
if len(opt.Description) > 0 {
field.Description = opt.Description
}
if len(opt.PolicyTags) > 0 {
field.PolicyTags = &PolicyTags{
Names: []string{opt.PolicyTags},
}
}
}
if field.Type != "RECORD" {
return field, nil
}
if t, ok := typeFromWKT[desc.GetTypeName()]; ok {
field.Type = t
return field, nil
}
fields, err := convertFieldsForType(curPkg, desc.GetTypeName(), parentMessages)
if err != nil {
return nil, err
}
if len(fields) == 0 { // discard RECORDs that would have zero fields
return nil, nil
}
field.Fields = fields
return field, nil
}
func convertExtraField(curPkg *ProtoPackage, extraFieldDefinition string, parentMessages map[*descriptor.DescriptorProto]bool) (*Field, error) {
parts := strings.Split(extraFieldDefinition, ":")
if len(parts) < 2 {
return nil, fmt.Errorf("expecting at least 2 parts in extra field definition separated by colon, got %d", len(parts))
}
field := &Field{
Name: parts[0],
Type: parts[1],
Mode: "NULLABLE",
}
modeIndex := 2
if field.Type == "RECORD" {
modeIndex = 3
}
if len(parts) > modeIndex {
field.Mode = parts[modeIndex]
}
if field.Type != "RECORD" {
return field, nil
}
if len(parts) < 3 {
return nil, fmt.Errorf("extra field %s has no type defined", field.Type)
}
typeName := parts[2]
if t, ok := typeFromWKT[typeName]; ok {
field.Type = t
return field, nil
}
fields, err := convertFieldsForType(curPkg, typeName, parentMessages)
if err != nil {
return nil, err
}
if len(fields) == 0 { // discard RECORDs that would have zero fields
return nil, nil
}
field.Fields = fields
return field, nil
}
func convertFieldsForType(curPkg *ProtoPackage,
typeName string,
parentMessages map[*descriptor.DescriptorProto]bool) ([]*Field, error) {
recordType, ok, comments, path := curPkg.lookupType(typeName)
if !ok {
return nil, fmt.Errorf("no such message type named %s", typeName)
}
fieldMsgOpts, err := getBigqueryMessageOptions(recordType)
if err != nil {
return nil, err
}
return convertMessageType(curPkg, recordType, fieldMsgOpts, parentMessages, comments, path)
}
func convertMessageType(
curPkg *ProtoPackage,
msg *descriptor.DescriptorProto,
opts *protos.BigQueryMessageOptions,
parentMessages map[*descriptor.DescriptorProto]bool,
comments Comments,
path string) (schema []*Field, err error) {
if parentMessages[msg] {
glog.Infof("Detected recursion for message %s, ignoring subfields", *msg.Name)
return
}
if glog.V(4) {
glog.Info("Converting message: ", prototext.Format(msg))
}
parentMessages[msg] = true
for fieldIndex, fieldDesc := range msg.GetField() {
fieldCommentPath := fmt.Sprintf("%s.%d.%d", path, fieldPath, fieldIndex)
field, err := convertField(curPkg, fieldDesc, opts, parentMessages, comments, fieldCommentPath)
if err != nil {
glog.Errorf("Failed to convert field %s in %s: %v", fieldDesc.GetName(), msg.GetName(), err)
return nil, err
}
// if we got no error and the field is nil, skip it
if field != nil {
schema = append(schema, field)
}
}
for _, extraField := range opts.GetExtraFields() {
field, err := convertExtraField(curPkg, extraField, parentMessages)
if err != nil {
glog.Errorf("Failed to convert extra field %s in %s: %v", extraField, msg.GetName(), err)
return nil, err
}
schema = append(schema, field)
}
parentMessages[msg] = false
return
}
func convertFile(file *descriptor.FileDescriptorProto) ([]*plugin.CodeGeneratorResponse_File, error) {
name := path.Base(file.GetName())
pkg, ok := globalPkg.relativelyLookupPackage(file.GetPackage())
if !ok {
return nil, fmt.Errorf("no such package found: %s", file.GetPackage())
}
comments := ParseComments(file)
response := []*plugin.CodeGeneratorResponse_File{}
for msgIndex, msg := range file.GetMessageType() {
path := fmt.Sprintf("%d.%d", messagePath, msgIndex)
opts, err := getBigqueryMessageOptions(msg)
if err != nil {
return nil, err
}
if opts == nil {
continue
}
tableName := opts.GetTableName()
if len(tableName) == 0 {
continue
}
glog.V(2).Info("Generating schema for a message type ", msg.GetName())
schema, err := convertMessageType(pkg, msg, opts, make(map[*descriptor.DescriptorProto]bool), comments, path)
if err != nil {
glog.Errorf("Failed to convert %s: %v", name, err)
return nil, err
}
jsonSchema, err := json.MarshalIndent(schema, "", " ")
if err != nil {
glog.Error("Failed to encode schema", err)
return nil, err
}
resFile := &plugin.CodeGeneratorResponse_File{
Name: proto.String(fmt.Sprintf("%s/%s.schema", strings.Replace(file.GetPackage(), ".", "/", -1), tableName)),
Content: proto.String(string(jsonSchema)),
}
response = append(response, resFile)
}
return response, nil
}
// getBigqueryMessageOptions returns the bigquery options for the given message.
// If an error is encountered, it is returned instead. If no error occurs, but
// the message has no gen_bq_schema.bigquery_opts option, this function returns
// nil, nil.
func getBigqueryMessageOptions(msg *descriptor.DescriptorProto) (*protos.BigQueryMessageOptions, error) {
options := msg.GetOptions()
if options == nil {
return nil, nil
}
if !proto.HasExtension(options, protos.E_BigqueryOpts) {
return nil, nil
}
return proto.GetExtension(options, protos.E_BigqueryOpts).(*protos.BigQueryMessageOptions), nil
}
// handleSingleMessageOpt handles --bq-schema_opt=single-message in protoc params.
// providing that param tells protoc-gen-bq-schema to treat each proto files only contains one top-level type.
// if a file contains no message types, then this function simply does nothing.
// if a file contains more than one message types, then only the first message type will be processed.
// in that case, the table names will follow the proto file names.
func handleSingleMessageOpt(file *descriptor.FileDescriptorProto, requestParam string) {
if !strings.Contains(requestParam, "single-message") || len(file.GetMessageType()) == 0 {
return
}
file.MessageType = file.GetMessageType()[:1]
message := file.GetMessageType()[0]
message.Options = &descriptor.MessageOptions{}
fileName := file.GetName()
proto.SetExtension(message.GetOptions(), protos.E_BigqueryOpts, &protos.BigQueryMessageOptions{
TableName: fileName[strings.LastIndexByte(fileName, '/')+1 : strings.LastIndexByte(fileName, '.')],
})
}
func Convert(req *plugin.CodeGeneratorRequest) (*plugin.CodeGeneratorResponse, error) {
generateTargets := make(map[string]bool)
for _, file := range req.GetFileToGenerate() {
generateTargets[file] = true
}
res := &plugin.CodeGeneratorResponse{}
for _, file := range req.GetProtoFile() {
for msgIndex, msg := range file.GetMessageType() {
glog.V(1).Infof("Loading a message type %s from package %s", msg.GetName(), file.GetPackage())
registerType(file.Package, msg, ParseComments(file), fmt.Sprintf("%d.%d", messagePath, msgIndex))
}
}
for _, file := range req.GetProtoFile() {
if _, ok := generateTargets[file.GetName()]; ok {
glog.V(1).Info("Converting ", file.GetName())
handleSingleMessageOpt(file, req.GetParameter())
converted, err := convertFile(file)
if err != nil {
res.Error = proto.String(fmt.Sprintf("Failed to convert %s: %v", file.GetName(), err))
return res, err
}
res.File = append(res.File, converted...)
}
}
return res, nil
}
// ConvertFrom converts input from protoc to a CodeGeneratorRequest and starts conversion
// Returning a CodeGeneratorResponse containing either an error or the results of converting the given proto
func ConvertFrom(rd io.Reader) (*plugin.CodeGeneratorResponse, error) {
glog.V(1).Info("Reading code generation request")
input, err := ioutil.ReadAll(rd)
if err != nil {
glog.Error("Failed to read request:", err)
return nil, err
}
req := &plugin.CodeGeneratorRequest{}
err = proto.Unmarshal(input, req)
if err != nil {
glog.Error("Can't unmarshal input:", err)
return nil, err
}
glog.V(1).Info("Converting input")
return Convert(req)
}

Просмотреть файл

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package main
package converter
import (
"testing"

Просмотреть файл

@ -12,15 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package main
package converter
import (
"encoding/json"
"reflect"
"testing"
"github.com/golang/protobuf/proto"
plugin "github.com/golang/protobuf/protoc-gen-go/plugin"
"google.golang.org/protobuf/encoding/prototext"
)
// schema is an internal representation of generated BigQuery schema
@ -37,7 +37,7 @@ func joinNames(targets map[string]*schema) (result string) {
func testConvert(t *testing.T, input string, expectedOutputs map[string]string, extras ...func(request *plugin.CodeGeneratorRequest)) {
req := plugin.CodeGeneratorRequest{}
if err := proto.UnmarshalText(input, &req); err != nil {
if err := prototext.Unmarshal([]byte(input), &req); err != nil {
t.Fatal("Failed to parse test input: ", err)
}
@ -55,7 +55,7 @@ func testConvert(t *testing.T, input string, expectedOutputs map[string]string,
expectedSchema[filename] = parsed
}
res, err := convert(&req)
res, err := Convert(&req)
if err != nil {
t.Fatal("Conversion failed. ", err)
}

Просмотреть файл

@ -0,0 +1,101 @@
package converter
import (
"fmt"
"strings"
"github.com/golang/glog"
descriptor "google.golang.org/protobuf/types/descriptorpb"
)
var (
globalPkg = &ProtoPackage{
name: "",
parent: nil,
children: make(map[string]*ProtoPackage),
types: make(map[string]*descriptor.DescriptorProto),
comments: make(map[string]Comments),
path: make(map[string]string),
}
)
// ProtoPackage describes a package of Protobuf, which is an container of message types.
type ProtoPackage struct {
name string
parent *ProtoPackage
children map[string]*ProtoPackage
types map[string]*descriptor.DescriptorProto
comments map[string]Comments
path map[string]string
}
func (pkg *ProtoPackage) lookupType(name string) (*descriptor.DescriptorProto, bool, Comments, string) {
if strings.HasPrefix(name, ".") {
return globalPkg.relativelyLookupType(name[1:])
}
for ; pkg != nil; pkg = pkg.parent {
if desc, ok, comments, path := pkg.relativelyLookupType(name); ok {
return desc, ok, comments, path
}
}
return nil, false, Comments{}, ""
}
func relativelyLookupNestedType(desc *descriptor.DescriptorProto, name string) (*descriptor.DescriptorProto, bool, string) {
components := strings.Split(name, ".")
path := ""
componentLoop:
for _, component := range components {
for nestedIndex, nested := range desc.GetNestedType() {
if nested.GetName() == component {
desc = nested
path = fmt.Sprintf("%s.%d.%d", path, subMessagePath, nestedIndex)
continue componentLoop
}
}
glog.Infof("no such nested message %s in %s", component, desc.GetName())
return nil, false, ""
}
return desc, true, strings.Trim(path, ".")
}
func (pkg *ProtoPackage) relativelyLookupType(name string) (*descriptor.DescriptorProto, bool, Comments, string) {
components := strings.SplitN(name, ".", 2)
switch len(components) {
case 0:
glog.V(1).Info("empty message name")
return nil, false, Comments{}, ""
case 1:
found, ok := pkg.types[components[0]]
return found, ok, pkg.comments[components[0]], pkg.path[components[0]]
case 2:
glog.Infof("looking for %s in %s at %s (%v)", components[1], components[0], pkg.name, pkg)
if child, ok := pkg.children[components[0]]; ok {
found, ok, comments, path := child.relativelyLookupType(components[1])
return found, ok, comments, path
}
if msg, ok := pkg.types[components[0]]; ok {
found, ok, path := relativelyLookupNestedType(msg, components[1])
return found, ok, pkg.comments[components[0]], pkg.path[components[0]] + "." + path
}
glog.V(1).Infof("no such package nor message %s in %s", components[0], pkg.name)
return nil, false, Comments{}, ""
default:
glog.Fatal("not reached")
return nil, false, Comments{}, ""
}
}
func (pkg *ProtoPackage) relativelyLookupPackage(name string) (*ProtoPackage, bool) {
components := strings.Split(name, ".")
for _, c := range components {
var ok bool
pkg, ok = pkg.children[c]
if !ok {
return nil, false
}
}
return pkg, true
}