Put the entire current DB schema into a single migration file.

Remove all the other migrations.

Fixes b/150140899.

Change-Id: I9697c11d1dd8b20dde6fadbdf7be2738583c9f9b
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/694062
Reviewed-by: Julie Qiu <julieqiu@google.com>
This commit is contained in:
Jonathan Amsterdam 2020-03-18 09:54:22 -04:00 коммит произвёл Julie Qiu
Родитель c5d548b120
Коммит 0001a2fda8
64 изменённых файлов: 288 добавлений и 1165 удалений

Просмотреть файл

@ -3,19 +3,24 @@
-- license that can be found in the LICENSE file.
DROP TABLE
versions,
modules,
packages,
imports,
imports_unique,
licenses,
excluded_prefixes,
module_version_states,
search_documents;
search_documents,
alternative_module_paths,
experiments,
package_version_states,
version_map;
DROP FUNCTION
hll_hash,
hll_zeros,
popular_search,
popular_search_go_mod,
trigger_modify_updated_at,
trigger_modify_packages_tsv_parent_directories,
trigger_modify_search_documents_tsv_parent_directories,

Просмотреть файл

@ -2,8 +2,8 @@
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
--
-- This schema migration is created from squashing all of our existing
-- migration files in commit 8670fb4511816244b7bb4e33912ce2cd3a1527ce.
-- This schema migration was created by dumping the DB schema
-- as of commit bc820754c5d2bce5c3cdb66515656afb5885a440.
SET statement_timeout = 0;
SET lock_timeout = 0;
@ -27,24 +27,34 @@ COMMENT ON FUNCTION trigger_modify_updated_at IS
'FUNCTION trigger_modify_updated_at sets the value of a column named updated_at to the current timestamp. This is used by the versions, packages, and search_documents tables as a trigger to set the value of updated_at.';
CREATE FUNCTION to_tsvector_parent_directories(package_path text, module_path text) RETURNS tsvector
LANGUAGE plpgsql
LANGUAGE plpgsql PARALLEL SAFE
AS $$
DECLARE
current_directory TEXT;
parent_directories TEXT;
sub_path TEXT;
parent_directories TEXT := module_path;
sub_directories TEXT[][];
current_directory TEXT := module_path;
tsv_parent_directories TSVECTOR := module_path::tsvector;
BEGIN
IF package_path = module_path THEN
RETURN tsv_parent_directories;
RETURN module_path::tsvector;
END IF;
-- +2 because substr is one-based and we need to include the trailing slash
sub_path := substr(package_path, length(module_path) + 2);
IF module_path = 'std' THEN
sub_path := package_path;
ELSE
sub_path := substr(package_path, length(module_path) + 2);
current_directory := module_path;
parent_directories := module_path;
END IF;
sub_directories := regexp_split_to_array(sub_path, '/');
FOR i IN 1..cardinality(sub_directories) LOOP
current_directory := current_directory || '/' || sub_directories[i];
parent_directories = parent_directories || ' ' || current_directory;
IF current_directory IS NULL THEN
current_directory := sub_directories[i];
ELSE
current_directory := COALESCE(current_directory, '') || '/' || sub_directories[i];
END IF;
parent_directories = COALESCE(parent_directories, '') || ' ' || current_directory;
END LOOP;
RETURN parent_directories::tsvector;
END;
@ -60,7 +70,7 @@ CREATE TYPE version_type AS ENUM (
COMMENT ON TYPE version_type IS
'ENUM version_type specifies the version types expected for a given module version.';
CREATE TABLE versions (
CREATE TABLE modules (
module_path text NOT NULL,
version text NOT NULL,
commit_time timestamp with time zone NOT NULL,
@ -69,36 +79,39 @@ CREATE TABLE versions (
readme_file_path text,
readme_contents text,
source_info jsonb,
major integer NOT NULL,
minor integer NOT NULL,
patch integer NOT NULL,
prerelease text NOT NULL,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
sort_version text NOT NULL,
redistributable boolean NOT NULL,
has_go_mod boolean,
PRIMARY KEY (module_path, version)
);
COMMENT ON TABLE versions IS
'TABLE versions contains modules at a specific semantic version.';
COMMENT ON COLUMN versions.prerelease IS
'prerelease will be set as "~" if the version does not have a prerelease tag.';
COMMENT ON TABLE modules IS
'TABLE modules contains modules at a specific semantic version.';
COMMENT ON COLUMN modules.sort_version IS
'COLUMN sort_version holds the version in a form suitable for use in ORDER BY.';
COMMENT ON COLUMN modules.redistributable IS
'COLUMN redistributable says whether the module is redistributable.';
COMMENT ON COLUMN modules.has_go_mod IS
'COLUMN has_go_mod records whether the module zip contains a go.mod file.';
CREATE INDEX idx_versions_semver_sort ON versions
USING btree (module_path, major DESC, minor DESC, patch DESC, prerelease DESC);
COMMENT ON INDEX idx_versions_semver_sort IS
CREATE INDEX idx_modules_sort_version ON modules USING btree (sort_version DESC, version_type DESC);
COMMENT ON INDEX idx_modules_sort_version IS
'INDEX idx_versions_semver_sort is used to sort versions in order of descending latest. It is used to get the latest version of a package/module and to fetch all versions of a package/module in semver order.';
CREATE INDEX idx_versions_module_path_text_pattern_ops ON versions
USING btree (module_path text_pattern_ops);
COMMENT ON INDEX idx_versions_module_path_text_pattern_ops IS
'INDEX idx_versions_module_path_text_pattern_ops is using to improve performance of LIKE statements for module_path. It is used to fetch directories matching a given module_path prefix.';
CREATE INDEX idx_versions_version_type ON versions USING btree (version_type);
COMMENT ON INDEX idx_versions_version_type IS
CREATE INDEX idx_modules_module_path_text_pattern_ops ON modules
USING btree (module_path text_pattern_ops);
COMMENT ON INDEX idx_modules_module_path_text_pattern_ops IS
'INDEX idx_versions_module_path_text_pattern_ops is used to improve performance of LIKE statements for module_path. It is used to fetch directories matching a given module_path prefix.';
CREATE INDEX idx_modules_version_type ON modules USING btree (version_type);
COMMENT ON INDEX idx_modules_version_type IS
'INDEX idx_versions_version_type is used when fetching versions for a given version_type.';
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON versions
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();
COMMENT ON TRIGGER set_updated_at ON versions IS
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON modules
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();
COMMENT ON TRIGGER set_updated_at ON modules IS
'TRIGGER set_updated_at updates the value of the updated_at column to the current timestamp whenever a row is inserted or updated to the table.';
CREATE TABLE packages (
@ -116,10 +129,10 @@ CREATE TABLE packages (
redistributable boolean DEFAULT false NOT NULL,
documentation text,
tsv_parent_directories tsvector,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (path, module_path, version),
FOREIGN KEY (module_path, version) REFERENCES versions(module_path, version) ON DELETE CASCADE
FOREIGN KEY (module_path, version) REFERENCES modules(module_path, version) ON DELETE CASCADE
);
COMMENT ON TABLE packages IS
'TABLE packages contains packages in a specific module version.';
@ -133,12 +146,10 @@ COMMENT ON INDEX idx_packages_v1_path IS
'INDEX idx_packages_v1_path is used to get all of the packages in a series.';
CREATE INDEX idx_packages_module_path_text_pattern_ops ON packages USING btree (module_path text_pattern_ops);
COMMENT ON INDEX idx_versions_module_path_text_pattern_ops IS
'INDEX idx_versions_module_path_text_pattern_ops is using to improve performance of LIKE statements for module_path. It is used to fetch directories matching a given module_path prefix.';
COMMENT ON INDEX idx_packages_module_path_text_pattern_ops IS
'INDEX idx_packages_module_path_text_pattern_ops is used to improve performance of LIKE statements for module_path. It is used to fetch directories matching a given module_path prefix.';
CREATE INDEX idx_packages_path_text_pattern_ops ON packages USING btree (path text_pattern_ops);
COMMENT ON INDEX idx_versions_module_path_text_pattern_ops IS
'INDEX idx_versions_module_path_text_pattern_ops is used to improve performance of LIKE statements for module_path. It is used to fetch directories matching a given module_path prefix.';
CREATE INDEX idx_packages_tsv_parent_directories ON packages USING gin (tsv_parent_directories);
COMMENT ON INDEX idx_packages_tsv_parent_directories IS
@ -169,13 +180,17 @@ CREATE TABLE imports (
from_module_path text NOT NULL,
from_version text NOT NULL,
to_path text NOT NULL,
PRIMARY KEY (to_path, from_path, from_version),
PRIMARY KEY (to_path, from_path, from_version, from_module_path),
FOREIGN KEY (from_path, from_module_path, from_version)
REFERENCES packages(path, module_path, version) ON DELETE CASCADE
);
COMMENT ON TABLE imports IS
'TABLE imports contains the imports for a package in the packages table. Package (from_path), in module (from_module_path) at version (from_version), imports package (to_path). We do not store the version and module at which to_path is imported because it is hard to compute.';
CREATE INDEX idx_imports_from_path_from_version ON imports USING btree (from_path, from_version);
COMMENT ON INDEX idx_imports_from_path_from_version IS
'INDEX idx_imports_from_path_from_version is used to improve performance of the imports tab.';
CREATE TABLE imports_unique (
to_path text NOT NULL,
from_path text NOT NULL,
@ -191,17 +206,23 @@ CREATE TABLE licenses (
file_path text NOT NULL,
contents text NOT NULL,
types text[],
coverage jsonb,
PRIMARY KEY (module_path, version, file_path),
FOREIGN KEY (module_path, version) REFERENCES versions(module_path, version) ON DELETE CASCADE
FOREIGN KEY (module_path, version) REFERENCES modules(module_path, version) ON DELETE CASCADE
);
COMMENT ON TABLE licenses IS
'TABLE licenses contains the license data for a given module version.';
COMMENT ON COLUMN licenses.coverage IS
'COLUMN coverage contains the JSON-serialized contents of the licensecheck.Coverage value returned from calling licencecheck.Cover.';
CREATE TABLE excluded_prefixes (
prefix text NOT NULL,
created_by text NOT NULL,
reason text NOT NULL,
created_at timestamp without time zone DEFAULT NOW(),
created_at timestamp with time zone DEFAULT now(),
CONSTRAINT excluded_prefixes_created_by_check CHECK ((created_by <> ''::text)),
CONSTRAINT excluded_prefixes_prefix_check CHECK ((prefix <> ''::text)),
CONSTRAINT excluded_prefixes_reason_check CHECK ((reason <> ''::text)),
PRIMARY KEY (prefix)
);
COMMENT ON TABLE excluded_prefixes IS
@ -210,18 +231,24 @@ COMMENT ON TABLE excluded_prefixes IS
CREATE TABLE module_version_states (
module_path text NOT NULL,
version text NOT NULL,
status integer,
error text,
status integer DEFAULT 0 NOT NULL,
error text DEFAULT ''::text NOT NULL,
try_count integer DEFAULT 0 NOT NULL,
last_processed_at timestamp with time zone,
next_processed_after timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
index_timestamp timestamp with time zone NOT NULL,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
app_version text DEFAULT ''::text NOT NULL,
sort_version text NOT NULL,
go_mod_path text DEFAULT ''::text NOT NULL,
PRIMARY KEY (module_path, version)
);
COMMENT ON TABLE module_version_states IS
'TABLE module_version_states is used by the ETL to record the state of every module we have seen from the proxy index.';
COMMENT ON COLUMN module_version_states.sort_version IS
'COLUMN sort_version holds the version in a form suitable for use in ORDER BY. The string format is described in internal/version.ForSorting.';
COMMENT ON COLUMN module_version_states.go_mod_path IS
'COLUMN go_mod_path holds the module path from the go.mod file.';
CREATE INDEX idx_module_version_states_index_timestamp ON module_version_states USING btree (index_timestamp DESC);
COMMENT ON INDEX idx_module_version_states_index_timestamp IS
@ -229,11 +256,15 @@ COMMENT ON INDEX idx_module_version_states_index_timestamp IS
CREATE INDEX idx_module_version_states_last_processed_at ON module_version_states USING btree (last_processed_at);
COMMENT ON INDEX idx_module_version_states_last_processed_at IS
'INDEX idx_module_version_states_last_processed_at is used to get the last time a given module version was attempted to be processed.';
'INDEX idx_module_version_states_last_processed_at is used to get the next time at which a module version should be retried for processing.';
CREATE INDEX idx_module_version_states_next_processed_after ON module_version_states USING btree (next_processed_after);
COMMENT ON INDEX idx_module_version_states_last_processed_at IS
'INDEX idx_module_version_states_last_processed_at is used to get the next time at which a module version should be retried for processing.';
COMMENT ON INDEX idx_module_version_states_next_processed_after IS
'INDEX idx_module_version_states_next_processed_after is used to get the next time at which a module version should be retried for processing.';
CREATE INDEX idx_module_version_states_sort_version ON module_version_states USING btree (sort_version DESC);
COMMENT ON INDEX idx_module_version_states_sort_version IS
'INDEX idx_module_version_states_sort_version is used to sort by version, to determine when a module version should be retried for processing.';
CREATE TABLE search_documents (
package_path text NOT NULL,
@ -249,10 +280,11 @@ CREATE TABLE search_documents (
hll_leading_zeros integer,
tsv_parent_directories tsvector,
tsv_search_tokens tsvector NOT NULL,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
version_updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
imported_by_count_updated_at timestamp without time zone,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
version_updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
imported_by_count_updated_at timestamp with time zone,
has_go_mod boolean,
PRIMARY KEY (package_path),
FOREIGN KEY (package_path, module_path, version)
REFERENCES packages(path, module_path, version) ON DELETE CASCADE
@ -263,11 +295,8 @@ COMMENT ON COLUMN search_documents.hll_register IS
'hll_* columns are added to help implement cardinality estimation using the hyperloglog algorithm. hll_register is the randomized bucket for this record.';
COMMENT ON COLUMN search_documents.hll_leading_zeros IS
'hll_* columns are added to help implement cardinality estimation using the hyperloglog algorithm. hll_leading_zeros is the number of leading zeros in the binary representation of hll_hash(package_path).';
-- INDEX idx_imported_by_count_gt_50 and idx_imported_by_count_gt_8 will be removed
-- and replaced by idx_imported_by_count_desc after testing to compare various search methods.
CREATE INDEX idx_imported_by_count_gt_8 ON search_documents USING btree (package_path) WHERE (imported_by_count > 8);
CREATE INDEX idx_imported_by_count_gt_50 ON search_documents USING btree (package_path) WHERE (imported_by_count > 50);
COMMENT ON COLUMN search_documents.has_go_mod IS
'COLUMN has_go_mod records whether the module zip contains a go.mod file.';
CREATE INDEX idx_imported_by_count_desc ON search_documents USING btree (imported_by_count DESC);
COMMENT ON INDEX idx_imported_by_count_desc IS
@ -320,7 +349,7 @@ COMMENT ON TRIGGER set_tsv_parent_directories ON search_documents IS
'TRIGGER set_tsv_parent_directories sets the value of tsv_parent_directories to the output of FUNCTION trigger_modify_search_documents_tsv_parent_directories when a new row in inserted.';
CREATE FUNCTION hll_hash(text) RETURNS bigint
LANGUAGE sql
LANGUAGE sql PARALLEL SAFE
AS $_$
-- This is somewhat a hack, since there is no from_hex function in postgres.
-- Take the first 64 bits of the md5 hash by converting the hexadecimal
@ -331,7 +360,7 @@ COMMENT ON FUNCTION hll_hash IS
'FUNCTION hll_hash is a 64-bit integral hash function, which is used in implementing the hyperloglog cardinality estimation algorithm.';
CREATE FUNCTION hll_zeros(bigint) RETURNS integer
LANGUAGE plpgsql
LANGUAGE plpgsql PARALLEL SAFE
AS $_$
BEGIN
IF $1 < 0 THEN
@ -346,7 +375,7 @@ BEGIN
END LOOP;
RETURN 1;
END; $_$;
COMMENT ON FUNCTION hll_zeros IS
COMMENT ON FUNCTION hll_zeros(bigint) IS
'FUNCTION hll_zeros returns the number of leading zeros in the binary representation of the given bigint.';
CREATE TYPE search_result AS (
@ -432,11 +461,198 @@ BEGIN
RETURN QUERY SELECT * FROM UNNEST(top[off+1:last_idx])
WHERE package_path IS NOT NULL AND score > 0.1;
END; $$;
COMMENT ON FUNCTION popular_search IS
COMMENT ON FUNCTION popular_search(rawquery text, lim integer, off integer) IS
'FUNCTION popular_search is used to generate results for search. It is implemented as a stored function, so that we can use a cursor to scan search documents procedurally, and stop scanning early, whenever our search results are provably correct.';
CREATE TEXT SEARCH CONFIGURATION golang (COPY = pg_catalog.english);
ALTER TEXT SEARCH CONFIGURATION golang DROP MAPPING FOR url_path;
ALTER TEXT SEARCH CONFIGURATION golang ALTER MAPPING FOR asciiword WITH simple,english_stem;
CREATE TEXT SEARCH CONFIGURATION golang (
PARSER = pg_catalog."default" );
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR asciiword WITH simple, english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR word WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR numword WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR email WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR url WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR host WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR sfloat WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR version WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR hword_numpart WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR hword_part WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR hword_asciipart WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR numhword WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR asciihword WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR hword WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR file WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR "float" WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR "int" WITH simple;
ALTER TEXT SEARCH CONFIGURATION golang
ADD MAPPING FOR uint WITH simple;
COMMENT ON TEXT SEARCH CONFIGURATION golang IS
'TEXT SEARCH CONFIGURATION golang is a custom search configuration used when creating tsvector for search. The url_path token type is remove, so that "github.com/foo/bar@v1.2.3" is indexed only as the full URL string, and not also"/foo/bar@v1.2.3". The asciiword token type is set to a "simple,english_stem" mapping, so that "plural" words will be indexed without stemming. This idea came from the "Morphological and Exact Search" section here: https://asp437.github.io/posts/flexible-fts.html.';
CREATE FUNCTION popular_search_go_mod(rawquery text, lim integer, off integer, redist_factor real, go_mod_factor real) RETURNS SETOF search_result
LANGUAGE plpgsql
AS $$
DECLARE cur CURSOR(query TSQUERY) FOR
SELECT
package_path,
module_path,
version,
commit_time,
imported_by_count,
(
ts_rank(tsv_search_tokens, query) *
ln(exp(1)+imported_by_count) *
CASE WHEN redistributable THEN 1 ELSE redist_factor END *
CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE go_mod_factor END *
CASE WHEN tsv_search_tokens @@ query THEN 1 ELSE 0 END
) score
FROM search_documents
ORDER BY imported_by_count DESC;
top search_result[];
res search_result;
last_idx INT;
BEGIN
last_idx := lim+off;
top := array_fill(NULL::search_result, array[last_idx]);
OPEN cur(query := websearch_to_tsquery(rawquery));
FETCH cur INTO res;
WHILE found LOOP
IF top[last_idx] IS NULL OR res.score >= top[last_idx].score THEN
FOR i IN 1..last_idx LOOP
IF top[i] IS NULL OR
(res.score > top[i].score) OR
(res.score = top[i].score AND res.commit_time > top[i].commit_time) OR
(res.score = top[i].score AND res.commit_time = top[i].commit_time AND
res.package_path < top[i].package_path) THEN
top := (top[1:i-1] || res) || top[i:last_idx-1];
EXIT;
END IF;
END LOOP;
END IF;
IF top[last_idx].score > ln(exp(1)+res.imported_by_count) THEN
EXIT;
END IF;
FETCH cur INTO res;
END LOOP;
CLOSE cur;
RETURN QUERY SELECT * FROM UNNEST(top[off+1:last_idx])
WHERE package_path IS NOT NULL AND score > 0.1;
END; $$;
COMMENT ON FUNCTION popular_search_go_mod(rawquery text, lim integer, off integer, redist_factor real, go_mod_factor real) IS
'FUNCTION popular_search_go_mod is identical to popular_search except for the additional multiplier for the has_go_mod filed.';
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE alternative_module_paths (
alternative text NOT NULL,
canonical text NOT NULL,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
UNIQUE(alternative, canonical)
);
COMMENT ON TABLE alternative_module_paths IS
'TABLE alternative_module_paths contains module_paths that are known to have (1) a vanity import path, such as github.com/rsc/quote vs rsc.io/quote (2) a mismatch between the module path in the go.mod and repository, such as in the case of forks, or (3) a case insensitive spelling, such as in the case of github.com/sirupsen/logrus vs github.com/Sirupsen/logrus. It is used to filter out modules with the alternative path from the discovery site dataset.';
COMMENT ON COLUMN alternative_module_paths.alternative IS
'COLUMN alternative contains the path prefix of packages that should be filtered out from the discovery site search results. For example, github.com/google/go-cloud is the alternative prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
COMMENT ON COLUMN alternative_module_paths.canonical IS
'COLUMN canonical contains the module path that can be found in the go.mod file of a package. For example, gocloud.dev is the canonical prefix for all packages in gocloud.dev and github.com/google/go-cloud.';
CREATE TABLE experiments (
name text NOT NULL,
rollout integer DEFAULT 0 NOT NULL,
description text NOT NULL,
PRIMARY KEY (name),
CONSTRAINT experiments_rollout_check CHECK (((rollout >= 0) AND (rollout <= 100)))
);
COMMENT ON TABLE experiments IS
'TABLE experiments contains data for running experiments.';
COMMENT ON COLUMN experiments.name IS
'COLUMN name is the name of the experiment.';
COMMENT ON COLUMN experiments.rollout IS
'COLUMN rollout is the percentage of total requests that are included for the experiment.';
COMMENT ON COLUMN experiments.description IS
'COLUMN description describes the experiment.';
CREATE TABLE package_version_states (
package_path text NOT NULL,
module_path text NOT NULL,
version text NOT NULL,
status integer NOT NULL,
error text,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (package_path, module_path, version),
FOREIGN KEY (module_path, version) REFERENCES module_version_states(module_path, version) ON DELETE CASCADE
);
COMMENT ON TABLE package_version_states IS
'TABLE package_version_states is used to record the state of every package we have seen from the proxy.';
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON package_version_states
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();
COMMENT ON TRIGGER set_updated_at ON package_version_states IS
'TRIGGER set_updated_at updates the value of the updated_at column to the current timestamp whenever a row is inserted or updated to the table.';
CREATE TABLE version_map (
module_path text NOT NULL,
requested_version text NOT NULL,
resolved_version text,
status integer NOT NULL,
error text,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
sort_version text,
PRIMARY KEY (module_path, requested_version)
);
COMMENT ON TABLE version_map IS
'TABLE version_map contains data about a user-requested path and the semantic version that it resolves to. It is used to support fetching frontend detail pages using module queries.';
COMMENT ON COLUMN version_map.requested_version IS
'COLUMN requested_version is the version that was requested by a user from the frontend. It may or may not resolve to a semantic version.';
COMMENT ON COLUMN version_map.resolved_version IS
'COLUMN resolved_version is the semantic version that a requested_version resolves to.';
COMMENT ON COLUMN version_map.status IS
'COLUMN status is the status returned by the ETL when fetching the module version.';
COMMENT ON COLUMN version_map.error IS
'COLUMN status is the error that occurred when fetching the module version, in cases when status != 200.';
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON version_map
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();

Просмотреть файл

@ -1,31 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE OR REPLACE FUNCTION to_tsvector_parent_directories(package_path text, module_path text) RETURNS tsvector
LANGUAGE plpgsql
AS $$
DECLARE
sub_path TEXT;
parent_directories TEXT := module_path;
sub_directories TEXT[][];
current_directory TEXT := module_path;
tsv_parent_directories TSVECTOR := module_path::tsvector;
BEGIN
IF package_path = module_path THEN
RETURN tsv_parent_directories;
END IF;
-- +2 because substr is one-based and we need to include the trailing slash
sub_path := substr(package_path, length(module_path) + 2);
sub_directories := regexp_split_to_array(sub_path, '/');
FOR i IN 1..cardinality(sub_directories) LOOP
current_directory := current_directory || '/' || sub_directories[i];
parent_directories = parent_directories || ' ' || current_directory;
END LOOP;
RETURN parent_directories::tsvector;
END;
$$;
END;

Просмотреть файл

@ -1,42 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE OR REPLACE FUNCTION to_tsvector_parent_directories(package_path text, module_path text) RETURNS tsvector
LANGUAGE plpgsql
AS $$
DECLARE
current_directory TEXT;
parent_directories TEXT;
sub_path TEXT;
sub_directories TEXT[][];
BEGIN
IF package_path = module_path THEN
RETURN module_path::tsvector;
END IF;
-- +2 because substr is one-based and we need to include the trailing slash
IF module_path = 'std' THEN
sub_path := package_path;
ELSE
sub_path := substr(package_path, length(module_path) + 2);
current_directory := module_path;
parent_directories := module_path;
END IF;
sub_directories := regexp_split_to_array(sub_path, '/');
FOR i IN 1..cardinality(sub_directories) LOOP
IF current_directory IS NULL THEN
current_directory := sub_directories[i];
ELSE
current_directory := COALESCE(current_directory, '') || '/' || sub_directories[i];
END IF;
parent_directories = COALESCE(parent_directories, '') || ' ' || current_directory;
END LOOP;
RETURN parent_directories::tsvector;
END;
$$ PARALLEL SAFE;
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP INDEX idx_imports_from_path_from_version;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE INDEX idx_imports_from_path_from_version ON imports USING btree (from_path, from_version);
COMMENT ON INDEX idx_imports_from_path_from_version IS
'INDEX idx_imports_from_path_from_version is used to improve performance of the imports tab.';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER FUNCTION hll_hash PARALLEL UNSAFE;
ALTER FUNCTION hll_zeros PARALLEL UNSAFE;
ALTER FUNCTION to_tsvector_parent_directories PARALLEL UNSAFE;
END;

Просмотреть файл

@ -1,15 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
-- These functions are set as PARALLEL SAFE, since they do not modify any
-- database state and are safe to be run in parallel.
-- https://www.postgresql.org/docs/11/parallel-safety.html
-- https://www.postgresql.org/docs/11/sql-createfunction.html (see PARALLEL section)
ALTER FUNCTION hll_hash PARALLEL SAFE;
ALTER FUNCTION hll_zeros PARALLEL SAFE;
ALTER FUNCTION to_tsvector_parent_directories PARALLEL SAFE;
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE vanity_prefixes;
END;

Просмотреть файл

@ -1,21 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE TABLE vanity_prefixes (
canonical TEXT NOT NULL,
alternative TEXT NOT NULL,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (canonical),
UNIQUE(alternative)
);
COMMENT ON TABLE vanity_prefixes IS
'TABLE vanity_prefixes contains path prefixes that are known to be hosted other an alias name. (For example gocloud.dev can also be fetched from the module proxy as github.com/google/go-cloud.) It is used to filter out packages whose import paths begin with the alternative prefix from search results.';
COMMENT ON COLUMN vanity_prefixes.canonical IS
'COLUMN canonical contains the path prefix that can be found in the go.mod file of a package. For example, gocloud.dev is the canonical prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
COMMENT ON COLUMN vanity_prefixes.alternative IS
'COLUMN alternative contains the path prefix of packages that should be filtered out from the discovery site search results. For example, github.com/google/go-cloud is the alternative prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE excluded_prefixes DROP CONSTRAINT excluded_prefixes_prefix_check;
ALTER TABLE excluded_prefixes DROP CONSTRAINT excluded_prefixes_created_by_check;
ALTER TABLE excluded_prefixes DROP CONSTRAINT excluded_prefixes_reason_check;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE excluded_prefixes ADD CHECK (prefix <> '');
ALTER TABLE excluded_prefixes ADD CHECK (reason <> '');
ALTER TABLE excluded_prefixes ADD CHECK (created_by <> '');
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE licenses DROP COLUMN coverage;
END;

Просмотреть файл

@ -1,12 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE licenses ADD COLUMN coverage jsonb;
COMMENT ON COLUMN licenses.coverage IS
'COLUMN coverage contains the JSON-serialized contents of the licensecheck.Coverage value returned from calling licencecheck.Cover.';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states DROP COLUMN sort_version;
ALTER TABLE versions DROP COLUMN sort_version;
END;

Просмотреть файл

@ -1,29 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states ADD COLUMN sort_version text;
COMMENT ON COLUMN module_version_states.sort_version IS
'COLUMN sort_version holds the version in a form suitable for use in ORDER BY. The string format is described in internal/version.ForSorting.';
CREATE INDEX idx_module_version_states_sort_version ON module_version_states (sort_version DESC);
COMMENT ON INDEX idx_module_version_states_sort_version IS
'INDEX idx_module_version_states_sort_version is used to sort by version, to determine when a module version should be retried for processing.';
ALTER TABLE versions ADD COLUMN sort_version text;
COMMENT ON COLUMN versions.sort_version IS
'COLUMN sort_version holds the version in a form suitable for use in ORDER BY.';
CREATE INDEX idx_versions_sort_version ON versions (sort_version DESC, version_type DESC);
COMMENT ON INDEX idx_versions_sort_version IS
'INDEX idx_versions_semver_sort is used to sort versions in order of descending latest. It is used to get the latest version of a package/module and to fetch all versions of a package/module in semver order.';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP FUNCTION all_redistributable;
DROP VIEW vw_module_licenses;
END;

Просмотреть файл

@ -1,75 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE OR REPLACE FUNCTION all_redistributable(types text[]) RETURNS boolean AS $$
SELECT COALESCE(types <@ ARRAY['AGPL-3.0', 'Apache-2.0', 'Artistic-2.0', 'BSD-2-Clause',
'BSD-3-Clause', 'BSL-1.0', 'GPL2', 'GPL3', 'ISC', 'LGPL-2.1',
'LGPL-3.0', 'MIT', 'MPL-2.0', 'Zlib'], false);
$$ LANGUAGE SQL;
COMMENT ON FUNCTION all_redistributable IS
'FUNCTION all_redistributable reports whether all types in the argument are redistributable license types.';
CREATE OR REPLACE VIEW vw_module_licenses AS
WITH top_modules AS (
-- Get the most popular modules from search_documents.
SELECT
module_path,
module_imported_by_count,
redistributable,
rank() OVER (ORDER BY module_imported_by_count desc) rank
FROM (
SELECT module_path, max(imported_by_count) AS module_imported_by_count, redistributable
FROM search_documents
WHERE module_path != 'std'
GROUP BY module_path, redistributable
ORDER BY max(imported_by_count) DESC
) a
WHERE module_imported_by_count > 10
), max_sort_versions AS (
-- Find the max sort_version of each of those modules.
SELECT v.module_path, MAX(v.sort_version) AS sort_version, t.module_imported_by_count, t.rank
FROM versions v, top_modules t
WHERE v.module_path = t.module_path
GROUP BY v.module_path, t.module_imported_by_count, t.rank
), max_versions AS (
-- Get versions from sort versions.
SELECT v.module_path, v.version, s.module_imported_by_count, s.rank
FROM versions v
INNER JOIN max_sort_versions s
USING (module_path, sort_version)
), top_level_licenses AS (
-- Get licenses at the module top level.
SELECT l.module_path, l.version, l.file_path, l.types, m.module_imported_by_count, m.rank, l.coverage
FROM licenses l
INNER JOIN max_versions m
USING (module_path, version)
WHERE position('/' in l.file_path) = 0
)
SELECT
m.module_path,
l.version,
l.file_path,
l.types,
m.module_imported_by_count,
m.rank,
l.coverage,
CASE
WHEN l.module_path IS NULL THEN 'No license'
WHEN NOT all_redistributable(l.types) THEN 'Unsupported license'
END AS reason_not_redistributable
FROM max_versions m
LEFT JOIN top_level_licenses l
ON m.module_path = l.module_path;
COMMENT ON VIEW vw_module_licenses IS
'VIEW vm_module_licenses holds license information for the most popular modules.
(Those where the max imported-by count of any package in the module is over 10).
The modules are ranked by imported-by count.
The built-in rank function assigns the same rank to equal values.';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states ALTER COLUMN sort_version DROP NOT NULL;
ALTER TABLE versions ALTER COLUMN sort_version DROP NOT NULL;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states ALTER COLUMN sort_version SET NOT NULL;
ALTER TABLE versions ALTER COLUMN sort_version SET NOT NULL;
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE experiments;
END;

Просмотреть файл

@ -1,25 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE TABLE experiments (
name TEXT NOT NULL PRIMARY KEY,
rollout INTEGER NOT NULL DEFAULT 0 CHECK (rollout >= 0 AND rollout <= 100),
description TEXT NOT NULL
);
COMMENT ON TABLE experiments IS
'TABLE experiments contains data for running experiments.';
COMMENT ON COLUMN experiments.name IS
'COLUMN name is the name of the experiment.';
COMMENT ON COLUMN experiments.rollout IS
'COLUMN rollout is the percentage of total requests that are included for the experiment.';
COMMENT ON COLUMN experiments.description IS
'COLUMN description describes the experiment.';
END;

Просмотреть файл

@ -1,13 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions
ALTER COLUMN major SET NOT NULL,
ALTER COLUMN minor SET NOT NULL,
ALTER COLUMN patch SET NOT NULL,
ALTER COLUMN prerelease SET NOT NULL;
END;

Просмотреть файл

@ -1,13 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions
ALTER COLUMN major DROP NOT NULL,
ALTER COLUMN minor DROP NOT NULL,
ALTER COLUMN patch DROP NOT NULL,
ALTER COLUMN prerelease DROP NOT NULL;
END;

Просмотреть файл

@ -1,15 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions
ADD COLUMN major integer,
ADD COLUMN minor integer,
ADD COLUMN patch integer,
ADD COLUMN prerelease text;
-- Write your migration here.
END;

Просмотреть файл

@ -1,13 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions
DROP COLUMN major,
DROP COLUMN minor,
DROP COLUMN patch,
DROP COLUMN prerelease;
END;

Просмотреть файл

@ -1,25 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE alternative_module_paths;
CREATE TABLE vanity_prefixes (
canonical TEXT NOT NULL,
alternative TEXT NOT NULL,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (canonical),
UNIQUE(alternative)
);
COMMENT ON TABLE vanity_prefixes IS
'TABLE vanity_prefixes contains path prefixes that are known to be hosted other an alias name. (For example gocloud.dev can also be fetched from the module proxy as github.com/google/go-cloud.) It is used to filter out packages whose import paths begin with the alternative prefix from search results.';
COMMENT ON COLUMN vanity_prefixes.canonical IS
'COLUMN canonical contains the path prefix that can be found in the go.mod file of a package. For example, gocloud.dev is the canonical prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
COMMENT ON COLUMN vanity_prefixes.alternative IS
'COLUMN alternative contains the path prefix of packages that should be filtered out from the discovery site search results. For example, github.com/google/go-cloud is the alternative prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
END;

Просмотреть файл

@ -1,24 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE vanity_prefixes;
CREATE TABLE alternative_module_paths (
alternative TEXT NOT NULL PRIMARY KEY,
canonical TEXT NOT NULL,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
UNIQUE(alternative, canonical)
);
COMMENT ON TABLE alternative_module_paths IS
'TABLE alternative_module_paths contains module_paths that are known to have (1) a vanity import path, such as github.com/rsc/quote vs rsc.io/quote (2) a mismatch between the module path in the go.mod and repository, such as in the case of forks, or (3) a case insensitive spelling, such as in the case of github.com/sirupsen/logrus vs github.com/Sirupsen/logrus. It is used to filter out modules with the alternative path from the discovery site dataset.';
COMMENT ON COLUMN alternative_module_paths.canonical IS
'COLUMN canonical contains the module path that can be found in the go.mod file of a package. For example, gocloud.dev is the canonical prefix for all packages in gocloud.dev and github.com/google/go-cloud.';
COMMENT ON COLUMN alternative_module_paths.alternative IS
'COLUMN alternative contains the path prefix of packages that should be filtered out from the discovery site search results. For example, github.com/google/go-cloud is the alternative prefix for all packages in the modules gocloud.dev and github.com/google/go-cloud.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE package_version_states;
END;

Просмотреть файл

@ -1,29 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE TABLE package_version_states (
package_path TEXT NOT NULL,
module_path TEXT NOT NULL,
version TEXT NOT NULL,
status INTEGER NOT NULL,
error TEXT,
created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
PRIMARY KEY (package_path, module_path, version),
FOREIGN KEY (module_path, version)
REFERENCES module_version_states(module_path, version)
ON DELETE CASCADE
);
COMMENT ON TABLE package_version_states IS
'TABLE package_version_states is used to record the state of every package we have seen from the proxy.';
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON package_version_states
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();
COMMENT ON TRIGGER set_updated_at ON package_version_states IS
'TRIGGER set_updated_at updates the value of the updated_at column to the current timestamp whenever a row is inserted or updated to the table.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions DROP COLUMN redistributable;
END;

Просмотреть файл

@ -1,12 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions ADD COLUMN redistributable bool;
COMMENT ON COLUMN versions.redistributable IS
'COLUMN redistributable says whether the module is redistributable.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states DROP COLUMN go_mod_path;
END;

Просмотреть файл

@ -1,12 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states ADD COLUMN go_mod_path text;
COMMENT ON COLUMN module_version_states.go_mod_path IS
'COLUMN go_mod_path holds the module path from the go.mod file.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions ALTER COLUMN redistributable DROP NOT NULL;
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions ALTER COLUMN redistributable SET NOT NULL;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions DROP COLUMN has_go_mod;
ALTER TABLE search_documents DROP COLUMN has_go_mod;
END;

Просмотреть файл

@ -1,18 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE versions ADD COLUMN has_go_mod boolean;
COMMENT ON COLUMN versions.has_go_mod IS
'COLUMN has_go_mod records whether the module zip contains a go.mod file.';
ALTER TABLE search_documents ADD COLUMN has_go_mod boolean;
COMMENT ON COLUMN search_documents.has_go_mod IS
'COLUMN has_go_mod records whether the module zip contains a go.mod file.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP TABLE version_map;
END;

Просмотреть файл

@ -1,33 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE TABLE version_map (
module_path TEXT NOT NULL,
requested_version TEXT NOT NULL,
resolved_version TEXT,
status INTEGER NOT NULL,
error TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (module_path, requested_version),
FOREIGN KEY (module_path, resolved_version) REFERENCES versions(module_path, version)
);
CREATE TRIGGER set_updated_at BEFORE INSERT OR UPDATE ON version_map
FOR EACH ROW EXECUTE PROCEDURE trigger_modify_updated_at();
COMMENT ON TABLE version_map IS
'TABLE version_map contains data about a user-requested path and the semantic version that it resolves to. It is used to support fetching frontend detail pages using module queries.';
COMMENT ON COLUMN version_map.resolved_version IS
'COLUMN resolved_version is the semantic version that a requested_version resolves to.';
COMMENT ON COLUMN version_map.requested_version IS
'COLUMN requested_version is the version that was requested by a user from the frontend. It may or may not resolve to a semantic version.';
COMMENT ON COLUMN version_map.status IS
'COLUMN status is the status returned by the ETL when fetching the module version.';
COMMENT ON COLUMN version_map.error IS
'COLUMN status is the error that occurred when fetching the module version, in cases when status != 200.';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP FUNCTION popular_search_go_mod;
END;

Просмотреть файл

@ -1,84 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE FUNCTION popular_search_go_mod(rawquery text, lim integer, off integer, redist_factor real, go_mod_factor real) RETURNS SETOF search_result
LANGUAGE plpgsql
AS $$
DECLARE cur CURSOR(query TSQUERY) FOR
SELECT
package_path,
module_path,
version,
commit_time,
imported_by_count,
(
ts_rank(tsv_search_tokens, query) *
ln(exp(1)+imported_by_count) *
CASE WHEN redistributable THEN 1 ELSE redist_factor END *
CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE go_mod_factor END *
-- Rather than add this `tsv_search_tokens @@ query` check to a
-- where clause, we simply annihilate the score. Adding it to the
-- where clause caused the query planner to eventually decide to
-- use the tsv_search_token gin index rather than the popular
-- index, which is exactly what this stored proc is trying to
-- avoid.
-- It seems like this should be redundant with the ts_rank factor
-- above, but in fact it is possible for ts_rank to be nonzero, yet
-- tsv_search_tokens @@ query is false (I think because ts_rank doesn't
-- have special handling for AND or OR conjunctions).
CASE WHEN tsv_search_tokens @@ query THEN 1 ELSE 0 END
) score
FROM search_documents
-- This should use the popular document index.
ORDER BY imported_by_count DESC;
-- top is the top search results, sorted by score descending, commit time
-- descending, then package_path ascending.
top search_result[];
-- res is the current search result.
res search_result;
-- last_idx is the index of the last element in top.
last_idx INT;
BEGIN
last_idx := lim+off;
top := array_fill(NULL::search_result, array[last_idx]);
OPEN cur(query := websearch_to_tsquery(rawquery));
FETCH cur INTO res;
WHILE found LOOP
IF top[last_idx] IS NULL OR res.score >= top[last_idx].score THEN
-- Insert res into top, maintaining sort order.
FOR i IN 1..last_idx LOOP
-- We want to preserve order by score desc, commit_time desc,
-- package_path asc, so insert res as soon as it sorted before top[i]
-- according to this ordering.
IF top[i] IS NULL OR
(res.score > top[i].score) OR
(res.score = top[i].score AND res.commit_time > top[i].commit_time) OR
(res.score = top[i].score AND res.commit_time = top[i].commit_time AND
res.package_path < top[i].package_path) THEN
top := (top[1:i-1] || res) || top[i:last_idx-1];
EXIT;
END IF;
END LOOP;
END IF;
IF top[last_idx].score > ln(exp(1)+res.imported_by_count) THEN
-- No subsequent document can be scored higher than our lowest scoring
-- document, as top[last_idx].score > 1.0*ln(e+imported_by_count), and
-- for all subsequent records ts_rank <= 1.0 and ln(e+imported_by_count)
-- is monotonically decreasing.
-- So we're done.
EXIT;
END IF;
FETCH cur INTO res;
END LOOP;
CLOSE cur;
RETURN QUERY SELECT * FROM UNNEST(top[off+1:last_idx])
WHERE package_path IS NOT NULL AND score > 0.1;
END; $$;
COMMENT ON FUNCTION popular_search_go_mod IS
'FUNCTION popular_search_go_mod is identical to popular_search except for the additional multiplier for the has_go_mod filed.';
END;

Просмотреть файл

@ -1,14 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE version_map ADD CONSTRAINT version_map_module_path_fkey
FOREIGN KEY (module_path, resolved_version)
REFERENCES versions(module_path, version);
ALTER TABLE version_map DROP COLUMN sort_version;
END;

Просмотреть файл

@ -1,10 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE version_map DROP CONSTRAINT version_map_module_path_fkey;
ALTER TABLE version_map ADD COLUMN sort_version TEXT;
END;

Просмотреть файл

@ -1,13 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states
ALTER COLUMN status DROP NOT NULL,
ALTER COLUMN status DROP DEFAULT,
ALTER COLUMN error DROP NOT NULL,
ALTER COLUMN error DROP DEFAULT;
END;

Просмотреть файл

@ -1,16 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
UPDATE module_version_states SET error = '' WHERE error IS NULL;
UPDATE module_version_states SET status = 0 WHERE status IS NULL;
ALTER TABLE module_version_states
ALTER COLUMN status SET DEFAULT 0,
ALTER COLUMN status SET NOT NULL,
ALTER COLUMN error SET NOT NULL,
ALTER COLUMN error SET DEFAULT '';
END;

Просмотреть файл

@ -1,10 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE module_version_states ALTER COLUMN go_mod_path DROP NOT NULL;
END;

Просмотреть файл

@ -1,13 +0,0 @@
-- Copyright 2019 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
UPDATE module_version_states SET go_mod_path = '' WHERE go_mod_path IS NULL;
ALTER TABLE module_version_states
ALTER COLUMN go_mod_path SET NOT NULL,
ALTER COLUMN go_mod_path SET DEFAULT '';
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP VIEW modules;
END;

Просмотреть файл

@ -1,9 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE VIEW modules AS SELECT * FROM versions;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE imports DROP CONSTRAINT imports_pkey;
ALTER TABLE imports ADD PRIMARY KEY (to_path, from_path, from_version);
END;

Просмотреть файл

@ -1,10 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE imports DROP CONSTRAINT imports_pkey;
ALTER TABLE imports ADD PRIMARY KEY (to_path, from_path, from_version, from_module_path);
END;

Просмотреть файл

@ -1,16 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE modules RENAME TO versions;
CREATE VIEW modules AS SELECT * FROM versions;
ALTER INDEX modules_pkey RENAME TO versions_pkey;
ALTER INDEX idx_modules_module_path_text_pattern_ops RENAME TO idx_versions_module_path_text_pattern_ops;
ALTER INDEX idx_modules_sort_version RENAME TO idx_versions_sort_version;
ALTER INDEX idx_modules_version_type RENAME TO idx_versions_version_type;
END;

Просмотреть файл

@ -1,16 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP VIEW modules;
ALTER TABLE versions RENAME TO modules;
ALTER INDEX versions_pkey RENAME TO modules_pkey;
ALTER INDEX idx_versions_module_path_text_pattern_ops RENAME TO idx_modules_module_path_text_pattern_ops;
ALTER INDEX idx_versions_sort_version RENAME TO idx_modules_sort_version;
ALTER INDEX idx_versions_version_type RENAME TO idx_modules_version_type;
END;

Просмотреть файл

@ -1,21 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
-- This migration does not need to be run in a transaction.
-- Each ALTER TABLE happens atomically, and is idempotent.
ALTER TABLE alternative_module_paths
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE;
ALTER TABLE excluded_prefixes
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE;
ALTER TABLE modules
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN updated_at type TIMESTAMP WITHOUT TIME ZONE;
ALTER TABLE version_map
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN updated_at type TIMESTAMP WITHOUT TIME ZONE;

Просмотреть файл

@ -1,20 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
-- This migration does not need to be run in a transaction.
-- Each ALTER TABLE happens atomically, and is idempotent.
ALTER TABLE alternative_module_paths
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC';
ALTER TABLE excluded_prefixes
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC';
ALTER TABLE modules
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC',
ALTER COLUMN updated_at type TIMESTAMP WITH TIME ZONE USING updated_at AT TIME ZONE 'UTC';
ALTER TABLE version_map
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC',
ALTER COLUMN updated_at type TIMESTAMP WITH TIME ZONE USING updated_at AT TIME ZONE 'UTC';

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE packages
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN updated_at type TIMESTAMP WITHOUT TIME ZONE;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE packages
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC',
ALTER COLUMN updated_at type TIMESTAMP WITH TIME ZONE USING updated_at AT TIME ZONE 'UTC';
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE package_version_states
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN updated_at type TIMESTAMP WITHOUT TIME ZONE;
END;

Просмотреть файл

@ -1,11 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
ALTER TABLE package_version_states
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC',
ALTER COLUMN updated_at type TIMESTAMP WITH TIME ZONE USING updated_at AT TIME ZONE 'UTC';
END;

Просмотреть файл

@ -1,21 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP INDEX idx_search_documents_version_updated_at;
ALTER TABLE search_documents
ALTER COLUMN created_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN updated_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN version_updated_at type TIMESTAMP WITHOUT TIME ZONE,
ALTER COLUMN imported_by_count_updated_at type TIMESTAMP WITHOUT TIME ZONE;
CREATE INDEX idx_search_documents_version_updated_at ON search_documents USING btree (version_updated_at);
COMMENT ON INDEX idx_search_documents_version_updated_at IS
'INDEX idx_search_documents_version_updated_at is used for incremental update of imported_by counts, in order to determine when the latest version of a package was last updated.';
END;

Просмотреть файл

@ -1,19 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP INDEX idx_search_documents_version_updated_at;
ALTER TABLE search_documents
ALTER COLUMN created_at type TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC',
ALTER COLUMN updated_at type TIMESTAMP WITH TIME ZONE USING updated_at AT TIME ZONE 'UTC',
ALTER COLUMN version_updated_at type TIMESTAMP WITH TIME ZONE USING version_updated_at AT TIME ZONE 'UTC',
ALTER COLUMN imported_by_count_updated_at type TIMESTAMP WITH TIME ZONE USING imported_by_count_updated_at AT TIME ZONE 'UTC';
CREATE INDEX idx_search_documents_version_updated_at ON search_documents USING btree (version_updated_at);
COMMENT ON INDEX idx_search_documents_version_updated_at IS
'INDEX idx_search_documents_version_updated_at is used for incremental update of imported_by counts, in order to determine when the latest version of a package was last updated.';
END;

Просмотреть файл

@ -1,75 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
CREATE INDEX idx_imported_by_count_gt_8 ON search_documents USING btree (package_path) WHERE (imported_by_count > 8);
CREATE INDEX idx_imported_by_count_gt_50 ON search_documents USING btree (package_path) WHERE (imported_by_count > 50);
CREATE FUNCTION all_redistributable(types text[]) RETURNS boolean
LANGUAGE sql
AS $$
SELECT COALESCE(types <@ ARRAY['AGPL-3.0', 'Apache-2.0', 'Artistic-2.0', 'BSD-2-Clause',
'BSD-3-Clause', 'BSL-1.0', 'GPL2', 'GPL3', 'ISC', 'LGPL-2.1',
'LGPL-3.0', 'MIT', 'MPL-2.0', 'Zlib'], false);
$$;
CREATE VIEW vw_module_licenses AS
WITH top_modules AS (
SELECT a.module_path,
a.module_imported_by_count,
a.redistributable,
rank() OVER (ORDER BY a.module_imported_by_count DESC) AS rank
FROM ( SELECT search_documents.module_path,
max(search_documents.imported_by_count) AS module_imported_by_count,
search_documents.redistributable
FROM search_documents
WHERE (search_documents.module_path <> 'std'::text)
GROUP BY search_documents.module_path, search_documents.redistributable
ORDER BY (max(search_documents.imported_by_count)) DESC) a
WHERE (a.module_imported_by_count > 10)
), max_sort_versions AS (
SELECT v.module_path,
max(v.sort_version) AS sort_version,
t.module_imported_by_count,
t.rank
FROM modules v,
top_modules t
WHERE (v.module_path = t.module_path)
GROUP BY v.module_path, t.module_imported_by_count, t.rank
), max_versions AS (
SELECT v.module_path,
v.version,
s.module_imported_by_count,
s.rank
FROM (modules v
JOIN max_sort_versions s USING (module_path, sort_version))
), top_level_licenses AS (
SELECT l_1.module_path,
l_1.version,
l_1.file_path,
l_1.types,
m_1.module_imported_by_count,
m_1.rank,
l_1.coverage
FROM (licenses l_1
JOIN max_versions m_1 USING (module_path, version))
WHERE ("position"(l_1.file_path, '/'::text) = 0)
)
SELECT m.module_path,
l.version,
l.file_path,
l.types,
m.module_imported_by_count,
m.rank,
l.coverage,
CASE
WHEN (l.module_path IS NULL) THEN 'No license'::text
WHEN (NOT all_redistributable(l.types)) THEN 'Unsupported license'::text
ELSE NULL::text
END AS reason_not_redistributable
FROM (max_versions m
LEFT JOIN top_level_licenses l ON ((m.module_path = l.module_path)));
END;

Просмотреть файл

@ -1,12 +0,0 @@
-- Copyright 2020 The Go Authors. All rights reserved.
-- Use of this source code is governed by a BSD-style
-- license that can be found in the LICENSE file.
BEGIN;
DROP INDEX idx_imported_by_count_gt_8;
DROP INDEX idx_imported_by_count_gt_50;
DROP VIEW vw_module_licenses;
DROP FUNCTION all_redistributable;
END;