diff --git a/registry/README.md b/registry/README.md index d936af5..653ca80 100644 --- a/registry/README.md +++ b/registry/README.md @@ -14,4 +14,38 @@ To run the registry service, use: feathr-registry --http-addr 0.0.0.0:8000 ``` -Check out for more command line options with `feathr-registry --help`, detailed documents are coming soon. \ No newline at end of file +You can also use `Dockerfile` to build a Docker image with UI fetched from [the main Feathr repo](https://github.com/linkedin/feathr). + +## Common Settings and Environment Variables + +### Command line options + +* `--http-addr`: Listening address, default to `0.0.0.0:8000`. +* `--api-base`: API base URL, default to `/api`, and the V1 and V2 API endpoint start with `/api/v1` and `/api/v2`. +* `--ext-http-addr`: Use if you have reverse proxy in front of the node and it is also a member of a cluster. The value of this option will be published to other nodes in the cluster so they can communicate to each other. Default value is same as `--http-addr`. +* `--load-db`: Add this option to load data from the database on start. +* `--write-db`: Add this option to write all updates to database, use with `--load-db` to enable fully sync with the database. +* `--node-id`: Node id in the cluster, default to `1`, each node must use unique value in the same cluster, otherwise it will not be able to join the cluster. +* `--seeds`: Comma separated list of seed nodes, new node will contact seeds to get the full picture of the whole cluster. +* `--no-init`: By default a node will try to start a new cluster if it cannot join existing one, use this option to disable this behavior. + +### Environment variables + +* `CONNECTION_STR`: Database connection string, can be either ADO connection string format (for SQLServer or AzureSQL) or URL (for MySQL/PostgreSQL/SQLite). +* `ENTITY_TABLE`: The name of the table that stores entities, default to `entities`. +* `EDGE_TABLE`: The name of the table that stores relationship between entities, default to `edges`. +* `RBAC_TABLE`: The name of the table that stores user permissions, default to `userroles`. +* `ENABLE_RBAC`: Set this variable to any non-empty string to enable access control, otherwise the access control is disabled. + +The database schema can be created with the SQL script under `scripts` directory. + +Check out for more command line options with `feathr-registry --help`, detailed documents are coming soon. +### Notes to clustering + +To enable registry clustering, you should: + +1. Start the initial node with `--node-id` equals `1`. +2. Start other nodes with unique node ids, and with `--seeds` option pointing to running nodes, this option can either be an `IP:port` combination, e.g. `1.2.3.4:8000`, or you can use DNS name instead of the IP address, the node will try to resolve all IP addresses of this DNS name to get as many seeds as possible. +3. Only 1 node should use `--load-db` and `--write-db` option, otherwise there could be race conditions and lead to corrupted data. Another use case is to use multiple nodes to write multiple different databases, if you need HA or geo-replication. +4. In case the database connected node is down, you can simply restart it and all missing operations will be replicated to this node, and database should be updated. +5. If you have reversed proxy such as nginx in front of the node, you may need to specify `--ext-http-addr`, then the node will report the value of this option as the external endpoint when joining the cluster, so other nodes can connect to it. \ No newline at end of file diff --git a/registry/scripts/mssql.sql b/registry/scripts/mssql.sql new file mode 100644 index 0000000..dbf3dda --- /dev/null +++ b/registry/scripts/mssql.sql @@ -0,0 +1,47 @@ +create table entities +( + entity_id varchar(50) not null + primary key, + entity_content nvarchar(max) not null +) +go + +create table edges +( + from_id varchar(50) not null, + to_id varchar(50) not null, + edge_type varchar(50) not null, + constraint edges_pk + primary key (from_id, to_id, edge_type) +) +go + +create table userroles +( + record_id int identity, + project_name varchar(100) not null, + user_name varchar(100) not null, + role_name varchar(100) not null, + create_by varchar(100) not null, + create_reason nvarchar(max) not null, + create_time datetime not null, + delete_by varchar(100), + delete_reason nvarchar(max), + delete_time datetime +) + +create index create_by + on userroles (create_by); + +create index delete_by + on userroles (delete_by); + +create index project_name + on userroles (project_name); + +create index role_name + on userroles (role_name); + +create index user_name + on userroles (user_name); +go diff --git a/registry/scripts/mysql.sql b/registry/scripts/mysql.sql new file mode 100644 index 0000000..2139c85 --- /dev/null +++ b/registry/scripts/mysql.sql @@ -0,0 +1,54 @@ +create table entities +( + entity_id varchar(50) not null + primary key, + entity_content mediumtext not null +); + +create table edges +( + from_id varchar(50) not null, + to_id varchar(50) not null, + edge_type varchar(20) not null +); + +create index entity_dep_conn_type_index + on edges (edge_type); + +create index entity_dep_from_id_index + on edges (from_id); + +create index entity_dep_to_id_index + on edges (to_id); + + +create table userroles +( + record_id int auto_increment + primary key, + project_name varchar(255) not null, + user_name varchar(255) not null, + role_name varchar(50) not null, + create_by varchar(255) not null, + create_reason text not null, + create_time datetime not null, + delete_by varchar(255) null, + delete_reason text null, + delete_time datetime null +); + +create index create_by + on userroles (create_by); + +create index delete_by + on userroles (delete_by); + +create index project_name + on userroles (project_name); + +create index role_name + on userroles (role_name); + +create index user_name + on userroles (user_name); + diff --git a/registry/scripts/sqlite.sql b/registry/scripts/sqlite.sql new file mode 100644 index 0000000..7bd99fc --- /dev/null +++ b/registry/scripts/sqlite.sql @@ -0,0 +1,11 @@ +CREATE TABLE entities( + entity_id varchar(50), + entity_content text, + PRIMARY KEY (entity_id) +); +CREATE TABLE edges( + from_id varchar(50), + to_id varchar(50), + edge_type varchar(50), + PRIMARY KEY (from_id, to_id, edge_type) +);