Add options to the cli for passing in arguments to postgres db

2019-11-15 13:47:44 -08:00 · 2019-11-15 13:47:44 -08:00 · 30bfe79f5f
--- a/mozaggregator/cli.py
+++ b/mozaggregator/cli.py
@ -23,6 +23,11 @@ def entry_point():
 )
@click.option("--credentials-bucket", type=str, required=False)
@click.option("--credentials-prefix", type=str, required=False)
@click.option("--postgres-db", type=str, required=False)
@click.option("--postgres-user", type=str, required=False)
@click.option("--postgres-pass", type=str, required=False)
@click.option("--postgres-host", type=str, required=False)
@click.option("--postgres-ro-host", type=str, required=False)
@click.option("--num-partitions", type=int, default=10000)
@click.option(
    "--source",
@ -40,6 +45,11 @@ def run_aggregator(
    credentials_protocol,
    credentials_bucket,
    credentials_prefix,
    postgres_db,
    postgres_user,
    postgres_pass,
    postgres_host,
    postgres_ro_host,
    num_partitions,
    source,
    project_id,
@ -55,12 +65,22 @@ def run_aggregator(
        mapping = {"file": "file", "s3": "s3a", "gcs": "gs"}
        return f"{mapping[protocol]}://{bucket}/{prefix}"
-    if credentials_bucket and credentials_prefix:
+    # priority of reading credentials is options > credentials file > environment
    option_credentials = {
        "POSTGRES_DB": postgres_db,
        "POSTGRES_USER": postgres_user,
        "POSTGRES_PASS": postgres_pass,
        "POSTGRES_HOST": postgres_host,
        "POSTGRES_RO_HOST": postgres_ro_host,
    }
    if all(option_credentials.values()):
        print("reading credentials from options")
        environ.update(option_credentials)
    elif credentials_bucket and credentials_prefix:
        path = create_path(credentials_protocol, credentials_bucket, credentials_prefix)
        print(f"reading credentials from {path}")
        creds = spark.read.json(path, multiLine=True).first().asDict()
-        for k, v in creds.items():
+        environ.update(creds)
            environ[k] = v
    else:
        print(f"assuming credentials from the environment")
--- a/tests/test_db.py
+++ b/tests/test_db.py
@ -134,16 +134,8 @@ def test_notice_logging_cursor():
    lc.check(expected)
-def test_aggregation_cli(tmp_path, monkeypatch, spark):
+@pytest.fixture
-    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "access")
+def mock_dataset(monkeypatch, spark):
    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "secret")
    test_creds = str(tmp_path / "creds")
    # generally points to the production credentials
    creds = {"DB_TEST_URL": "dbname=postgres user=postgres host=db"}
    with open(test_creds, "w") as f:
        json.dump(creds, f)
    class Dataset:
        @staticmethod
        def from_source(*args, **kwargs):
@ -157,6 +149,14 @@ def test_aggregation_cli(tmp_path, monkeypatch, spark):
    monkeypatch.setattr("mozaggregator.aggregator.Dataset", Dataset)
 def test_aggregation_cli(tmp_path, mock_dataset):
    test_creds = str(tmp_path / "creds")
    # generally points to the production credentials
    creds = {"DB_TEST_URL": "dbname=postgres user=postgres host=db"}
    with open(test_creds, "w") as f:
        json.dump(creds, f)
    result = CliRunner().invoke(
        run_aggregator,
        [
@ -180,20 +180,7 @@ def test_aggregation_cli(tmp_path, monkeypatch, spark):
    assert_new_db_functions_backwards_compatible()
-def test_aggregation_cli_no_credentials_file(monkeypatch, spark):
+def test_aggregation_cli_no_credentials_file(mock_dataset):
    class Dataset:
        @staticmethod
        def from_source(*args, **kwargs):
            return Dataset()
        def where(self, *args, **kwargs):
            return self
        def records(self, *args, **kwargs):
            return spark.sparkContext.parallelize(generate_pings())
    monkeypatch.setattr("mozaggregator.aggregator.Dataset", Dataset)
    result = CliRunner().invoke(
        run_aggregator,
        [
@ -219,6 +206,61 @@ def test_aggregation_cli_no_credentials_file(monkeypatch, spark):
    assert_new_db_functions_backwards_compatible()
 def test_aggregation_cli_credentials_option(mock_dataset):
    empty_env = {
        "DB_TEST_URL": "",
        "POSTGRES_DB": "",
        "POSTGRES_USER": "",
        "POSTGRES_PASS": "",
        "POSTGRES_HOST": "",
        "POSTGRES_RO_HOST": ","
    }
    options = [
        "--postgres-db",
        "postgres",
        "--postgres-user",
        "postgres",
        "--postgres-pass",
        "pass",
        "--postgres-host",
        "db",
        "--postgres-ro-host",
        "db"
    ]
    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--num-partitions",
            10,
        ] + options,
        env=empty_env,
        catch_exceptions=False,
    )
    assert result.exit_code == 0, result.output
    assert_new_db_functions_backwards_compatible()
    # now test that missing an option will exit with non-zero
    result = CliRunner().invoke(
        run_aggregator,
        [
            "--date",
            SUBMISSION_DATE_1.strftime('%Y%m%d'),
            "--channels",
            "nightly,beta",
            "--num-partitions",
            10,
        ] + options[:2], # missing ro_host
        env=empty_env,
        catch_exceptions=False,
    )
    assert result.exit_code == 1
@runif_bigquery_testing_enabled
 def test_aggregation_cli_bigquery(tmp_path, bq_testing_table):
    test_creds = str(tmp_path / "creds")