ldap-teamsync/app.py

403 строки
13 KiB
Python
Исходник Постоянная ссылка Обычный вид История

2020-07-01 06:32:44 +03:00
import atexit
from operator import truediv
2020-06-18 23:48:01 +03:00
import os
2020-07-01 06:32:44 +03:00
import time
2021-03-25 06:36:24 +03:00
import json
import github3
2020-07-01 06:32:44 +03:00
from distutils.util import strtobool
import threading
import sys
import traceback
from concurrent.futures import ThreadPoolExecutor
2020-07-01 06:32:44 +03:00
2020-06-20 01:00:02 +03:00
from apscheduler.schedulers.background import BackgroundScheduler
2020-07-01 01:08:15 +03:00
from apscheduler.triggers.cron import CronTrigger
2020-07-01 06:32:44 +03:00
from flask import Flask
from githubapp import (
GitHubApp,
DirectoryClient,
CRON_INTERVAL,
TEST_MODE,
ADD_MEMBER,
2022-05-11 16:04:31 +03:00
REMOVE_ORG_MEMBERS_WITHOUT_TEAM,
USER_SYNC_ATTRIBUTE,
SYNCMAP_ONLY,
)
2020-06-17 17:43:47 +03:00
app = Flask(__name__)
github_app = GitHubApp(app)
2020-06-20 01:00:02 +03:00
# Schedule a full sync
scheduler = BackgroundScheduler(daemon=True)
scheduler.start()
atexit.register(lambda: scheduler.shutdown(wait=False))
2020-07-01 07:24:37 +03:00
2021-03-22 07:55:00 +03:00
@github_app.on("team.created")
2020-07-01 00:33:46 +03:00
def sync_new_team():
2020-06-30 22:30:09 +03:00
"""
2020-07-01 00:33:46 +03:00
Sync a new team when it is created
2020-06-30 22:30:09 +03:00
:return:
"""
2021-03-22 07:55:00 +03:00
owner = github_app.payload["organization"]["login"]
team_id = github_app.payload["team"]["id"]
2021-03-22 18:54:04 +03:00
if os.environ["USER_DIRECTORY"].upper() == "AAD":
2021-04-07 10:58:27 +03:00
# Azure APIs don't currently support case insensitive searching
2021-03-22 18:54:04 +03:00
slug = github_app.payload["team"]["name"].replace(" ", "-")
2021-03-22 18:53:35 +03:00
else:
slug = github_app.payload["team"]["slug"]
2020-07-01 00:33:46 +03:00
client = github_app.installation_client
2021-03-22 07:55:00 +03:00
sync_team(client=client, owner=owner, team_id=team_id, slug=slug)
2020-06-20 01:00:02 +03:00
2020-07-01 00:33:46 +03:00
def sync_team(client=None, owner=None, team_id=None, slug=None):
2020-06-19 21:38:37 +03:00
"""
2020-07-01 07:24:37 +03:00
Prepare the team sync
2020-07-01 00:33:46 +03:00
:param client:
:param owner:
:param team_id:
:param slug:
2020-06-19 21:38:37 +03:00
:return:
"""
2021-03-25 06:45:04 +03:00
print("-------------------------------")
print(f"Processing Team: {slug}")
try:
org = client.organization(owner)
team = org.team(team_id)
custom_map, group_prefix, ignore_users = load_custom_map()
2020-07-01 06:51:35 +03:00
try:
directory_group = get_directory_from_slug(slug, custom_map, org)
# If we're filtering on group prefix, skip if the group doesn't match
2023-08-18 04:37:35 +03:00
if group_prefix.length() > 0 and not directory_group.startswith(
tuple(group_prefix)
):
print(f"skipping team {team.slug} - not in group prefix")
return
directory_members = directory_group_members(group=directory_group)
except Exception as e:
directory_members = []
traceback.print_exc(file=sys.stderr)
team_members = github_team_members(
2022-05-26 15:40:13 +03:00
client=client,
owner=owner,
team_id=team_id,
attribute=USER_SYNC_ATTRIBUTE,
ignore_users=ignore_users,
)
compare = compare_members(
group=directory_members, team=team_members, attribute=USER_SYNC_ATTRIBUTE
)
if TEST_MODE:
print(f"TEST_MODE: Pending changes for team {team.slug}:")
print(json.dumps(compare, indent=2))
else:
try:
execute_sync(org=org, team=team, slug=slug, state=compare)
except (AssertionError, ValueError) as e:
if strtobool(os.environ["OPEN_ISSUE_ON_FAILURE"]):
open_issue(client=client, slug=slug, message=e)
raise Exception(f"Team {team.slug} sync failed: {e}")
print(f"Processing Team Successful: {team.slug}")
except Exception:
traceback.print_exc(file=sys.stderr)
raise
2020-06-19 00:58:56 +03:00
2020-06-17 17:43:47 +03:00
2021-03-22 07:13:27 +03:00
def directory_group_members(group=None):
2020-06-17 17:43:47 +03:00
"""
2021-03-22 07:13:27 +03:00
Look up members of a group in your user directory
:param group: The name of the group to query in your directory server
2020-06-17 18:20:02 +03:00
:type group: str
2021-03-22 07:13:27 +03:00
:return: group_members
2020-06-17 18:20:02 +03:00
:rtype: list
2020-06-17 17:43:47 +03:00
"""
try:
directory = DirectoryClient()
members = directory.get_group_members(group_name=group)
group_members = [member for member in members]
except Exception as e:
group_members = []
traceback.print_exc(file=sys.stderr)
2021-03-22 07:13:27 +03:00
return group_members
2020-06-17 17:43:47 +03:00
2020-06-19 21:38:37 +03:00
2020-07-02 03:19:12 +03:00
def github_team_info(client=None, owner=None, team_id=None):
2020-06-19 21:38:37 +03:00
"""
Look up team info in GitHub
2020-07-01 00:33:46 +03:00
:param client:
:param owner:
2020-06-19 21:38:37 +03:00
:param team_id:
:return:
"""
2020-07-01 00:33:46 +03:00
org = client.organization(owner)
2020-06-19 00:58:56 +03:00
return org.team(team_id)
2020-06-17 17:43:47 +03:00
2020-06-19 21:38:37 +03:00
2022-05-26 15:40:13 +03:00
def github_team_members(
client=None, owner=None, team_id=None, attribute="username", ignore_users=[]
):
2020-06-17 17:43:47 +03:00
"""
Look up members of a given team in GitHub
2020-07-01 00:33:46 +03:00
:param client:
:param owner:
2020-06-17 17:43:47 +03:00
:param team_id:
2020-06-17 23:31:54 +03:00
:param attribute:
2020-07-01 00:33:46 +03:00
:type owner: str
2020-06-17 18:20:02 +03:00
:type team_id: int
2020-06-17 23:31:54 +03:00
:type attribute: str
2020-06-17 18:20:02 +03:00
:return: team_members
:rtype: list
2020-06-17 17:43:47 +03:00
"""
2020-06-17 23:31:54 +03:00
team_members = []
2020-07-02 03:19:12 +03:00
team = github_team_info(client=client, owner=owner, team_id=team_id)
2021-03-22 07:55:00 +03:00
if attribute == "email":
2020-06-17 23:31:54 +03:00
for m in team.members():
2020-07-01 00:33:46 +03:00
user = client.user(m.login)
2021-03-22 07:55:00 +03:00
team_members.append(
{
"username": str(user.login),
"email": str(user.email),
2021-03-22 07:55:00 +03:00
}
)
2020-06-17 23:31:54 +03:00
else:
for member in team.members():
team_members.append({"username": str(member), "email": ""})
return [m for m in team_members if m["username"] not in ignore_users]
2020-06-17 17:43:47 +03:00
2021-03-22 07:55:00 +03:00
def compare_members(group, team, attribute="username"):
2020-06-17 18:20:02 +03:00
"""
2021-03-22 07:13:27 +03:00
Compare users in GitHub and the User Directory to see which users need to be added or removed
2020-07-01 06:32:44 +03:00
:param group:
:param team:
2020-06-17 23:31:54 +03:00
:param attribute:
2020-06-17 18:20:02 +03:00
:return: sync_state
:rtype: dict
"""
directory_list = [x[attribute].casefold() for x in group]
github_list = [x[attribute].casefold() for x in team]
2021-03-22 18:53:35 +03:00
add_users = list(set(directory_list) - set(github_list))
2021-03-22 07:13:27 +03:00
remove_users = list(set(github_list) - set(directory_list))
2020-06-17 18:20:02 +03:00
sync_state = {
2021-03-22 07:55:00 +03:00
"directory": group,
"github": team,
"action": {"add": add_users, "remove": remove_users},
2020-06-17 18:20:02 +03:00
}
return sync_state
2020-06-17 17:43:47 +03:00
2020-06-19 22:58:55 +03:00
def execute_sync(org, team, slug, state):
2020-06-19 21:38:37 +03:00
"""
Perform the synchronization
:param org:
:param team:
2020-06-19 22:58:55 +03:00
:param slug:
2020-06-19 21:38:37 +03:00
:param state:
:return:
"""
2021-04-07 11:04:33 +03:00
total_changes = len(state["action"]["remove"]) + len(state["action"]["add"])
2021-03-22 07:55:00 +03:00
if len(state["directory"]) == 0:
2021-03-22 07:13:27 +03:00
message = f"{os.environ.get('USER_DIRECTORY', 'LDAP').upper()} group returned empty: {slug}"
2020-06-19 22:58:55 +03:00
raise ValueError(message)
2021-03-22 07:55:00 +03:00
elif int(total_changes) > int(os.environ.get("CHANGE_THRESHOLD", 25)):
2020-06-19 22:58:55 +03:00
message = "Skipping sync for {}.<br>".format(slug)
message += "Total number of changes ({}) would exceed the change threshold ({}).".format(
2021-03-22 07:55:00 +03:00
str(total_changes), str(os.environ.get("CHANGE_THRESHOLD", 25))
2020-06-19 22:58:55 +03:00
)
message += "<br>Please investigate this change and increase your threshold if this is accurate."
raise AssertionError(message)
else:
2021-03-22 07:55:00 +03:00
for user in state["action"]["add"]:
2020-06-19 22:58:55 +03:00
# Validate that user is in org
2021-04-12 15:33:41 +03:00
if org.is_member(user) or ADD_MEMBER:
try:
2021-04-07 10:59:53 +03:00
print(f"Adding {user} to {slug}")
team.add_or_update_membership(user)
except github3.exceptions.NotFoundError:
2021-04-07 10:59:53 +03:00
print(f"User: {user} not found")
pass
2020-06-19 22:58:55 +03:00
else:
2021-03-25 06:36:24 +03:00
print(f"Skipping {user} as they are not part of the org")
2020-06-19 22:58:55 +03:00
2021-03-22 07:55:00 +03:00
for user in state["action"]["remove"]:
2021-03-25 06:36:24 +03:00
print(f"Removing {user} from {slug}")
2020-06-19 22:58:55 +03:00
team.revoke_membership(user)
2020-07-01 00:33:46 +03:00
def open_issue(client, slug, message):
2020-07-01 07:24:37 +03:00
"""
Open an issue with the failed sync details
:param client: Our installation client
:param slug: Team slug
:param message: Error message to detail
:return:
"""
2021-03-22 07:55:00 +03:00
repo_for_issues = os.environ["REPO_FOR_ISSUES"]
owner = repo_for_issues.split("/")[0]
repository = repo_for_issues.split("/")[1]
assignee = os.environ["ISSUE_ASSIGNEE"]
2020-07-01 00:33:46 +03:00
client.create_issue(
2020-06-19 22:03:09 +03:00
owner=owner,
repository=repository,
assignee=assignee,
2020-06-19 22:58:55 +03:00
title="Team sync failed for @{}/{}".format(owner, slug),
2021-03-22 07:55:00 +03:00
body=str(message),
2020-06-19 22:03:09 +03:00
)
2020-06-30 22:30:09 +03:00
2020-07-01 07:24:37 +03:00
2021-03-22 07:55:00 +03:00
def load_custom_map(file="syncmap.yml"):
2020-07-02 03:19:12 +03:00
"""
Custom team synchronization
:param file:
:return:
"""
syncmap = {}
ignore_users = []
2020-07-02 03:19:12 +03:00
if os.path.isfile(file):
from yaml import load, Loader
2021-03-22 07:55:00 +03:00
with open(file, "r") as f:
2020-07-02 03:19:12 +03:00
data = load(f, Loader=Loader)
2021-03-22 07:55:00 +03:00
for d in data["mapping"]:
if "org" in d:
syncmap[(d["org"], d["github"])] = d["directory"]
else:
syncmap[d["github"]] = d["directory"]
group_prefix = data.get("group_prefix", [])
2022-05-26 15:40:13 +03:00
ignore_users = data.get("ignore_users", [])
return (syncmap, group_prefix, ignore_users)
2020-07-02 03:19:12 +03:00
def get_app_installations():
"""
Get a list of installations for this app
:return:
"""
with app.app_context() as ctx:
try:
2021-04-07 10:58:27 +03:00
c = ctx.push()
gh = GitHubApp(c)
installations = gh.app_client.app_installations
finally:
2021-04-07 10:58:27 +03:00
ctx.pop()
return installations
2021-03-22 07:55:00 +03:00
@scheduler.scheduled_job(
trigger=CronTrigger.from_crontab(CRON_INTERVAL), id="sync_all_teams"
)
2020-07-01 00:33:46 +03:00
def sync_all_teams():
"""
2021-03-22 07:13:27 +03:00
Lookup teams in a GitHub org and synchronize all teams with your user directory
2020-07-01 00:33:46 +03:00
:return:
"""
print(f'Syncing all teams: {time.strftime("%A, %d. %B %Y %I:%M:%S %p")}')
installations = get_app_installations()
2023-08-18 04:43:16 +03:00
custom_map, _ = load_custom_map()
futures = []
install_count = 0
with ThreadPoolExecutor(max_workers=10) as exe:
for i in installations():
install_count += 1
print("========================================================")
print(f"## Processing Organization: {i.account['login']}")
print("========================================================")
with app.app_context() as ctx:
try:
gh = GitHubApp(ctx.push())
client = gh.app_installation(installation_id=i.id)
org = client.organization(i.account["login"])
for team in org.teams():
futures.append(
2023-08-18 04:44:27 +03:00
exe.submit(sync_team_helper, team, custom_map, client, org)
)
except Exception as e:
print(f"DEBUG: {e}")
finally:
ctx.pop()
if not install_count:
raise Exception(f"No installation defined for APP_ID {os.getenv('APP_ID')}")
for future in futures:
future.result()
2022-05-13 21:33:47 +03:00
if REMOVE_ORG_MEMBERS_WITHOUT_TEAM:
remove_org_members_without_team(installations)
print(f'Syncing all teams successful: {time.strftime("%A, %d. %B %Y %I:%M:%S %p")}')
2022-05-13 21:33:47 +03:00
def remove_org_members_without_team(installations):
for i in installations():
with app.app_context() as ctx:
try:
gh = GitHubApp(ctx.push())
client = gh.app_installation(installation_id=i.id)
org = client.organization(i.account["login"])
org_members = [member for member in org.members()]
team_members = [
member for team in org.teams() for member in team.members()
]
remove_members = list(set(org_members) - set(team_members))
for member in remove_members:
print(f"Removing {member}")
2022-05-27 15:58:24 +03:00
if not TEST_MODE:
org.remove_membership(str(member))
2022-05-13 21:33:47 +03:00
except Exception as e:
print(f"DEBUG: {e}")
finally:
ctx.pop()
def sync_team_helper(team, custom_map, client, org):
print(f"Organization: {org.login}")
try:
if SYNCMAP_ONLY and not is_team_in_map(team.slug, custom_map, org):
print(f"skipping team {team.slug} - not in sync map")
return
sync_team(
client=client,
owner=org.login,
team_id=team.id,
slug=team.slug,
)
except Exception as e:
print(f"Organization: {org.login}")
print(f"Unable to sync team: {team.slug}")
print(f"DEBUG: {e}")
2021-04-07 11:16:53 +03:00
def is_team_in_map(slug, custom_map, org):
key_with_org = (org.login, slug)
key_without_org = slug
if key_with_org in custom_map or key_without_org in custom_map:
return True
else:
return False
2023-05-18 23:41:04 +03:00
def get_directory_from_slug(slug, custom_map, org):
if not is_team_in_map(slug, custom_map, org):
return slug
elif (org.login, slug) in custom_map:
return custom_map[(org.login, slug)]
elif slug in custom_map:
return custom_map[slug]
2023-08-17 19:46:00 +03:00
if "FLASK_APP" in os.environ:
thread = threading.Thread(target=sync_all_teams)
thread.start()
2020-07-01 00:33:46 +03:00
2021-03-22 07:55:00 +03:00
if __name__ == "__main__":
if "FLASK_APP" in os.environ:
app.run(
host=os.environ.get("FLASK_RUN_HOST", "0.0.0.0"),
port=os.environ.get("FLASK_RUN_PORT", "5000"),
)
else:
sync_all_teams()