Add Jupyter Notebook for user searches.
See the README -- this is an overly broad search, you need to double check before assuming any match is valid.
This commit is contained in:
Родитель
056118b0b2
Коммит
9e7fa67268
|
@ -6,6 +6,15 @@ not yet been released.** See
|
|||
|
||||
These are some API helper scripts for sanely managing a github org. For now this is somewhat hardcoded for the mozilla org; no need for it to remain that way though.
|
||||
|
||||
## Jupyter Notebooks
|
||||
### User Search.ipynb
|
||||
Given a set of possible GitHub logins, determine if they might have any
|
||||
permissions in various organizations. Links are provided for hits, so easy to
|
||||
examine more closely.
|
||||
|
||||
N.B.: Both this script and the GitHub search interface make assumptions. It is
|
||||
*your* responsibility to ensure any possible match is a valid match.
|
||||
|
||||
## Scripts
|
||||
### auditlog.py
|
||||
Download audit log for $ORG via headless firefox via selenium
|
||||
|
|
|
@ -0,0 +1,371 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# User Search\n",
|
||||
"For use to:\n",
|
||||
"1. Try to find an account based on random knowledge\n",
|
||||
"2. List all orgs they belong to (from a subset)\n",
|
||||
" - You will need org owner permissions to perform these searches"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"heading_collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"## Boiler plate\n",
|
||||
"Skip/hide this. Common usage is below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"hidden": true
|
||||
},
|
||||
"source": [
|
||||
"If you see this text, you may want to enable the nbextension \"Collapsable Headings\", so you can hide this in common usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set values here - you can also override below\n",
|
||||
"api_key = open(\".credentials\", \"r\").readlines()[1].strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"orgs_to_check = [ \"mozilla\"\n",
|
||||
" , \"mozilla-services\"\n",
|
||||
" , \"mozilla-mobile\"\n",
|
||||
" , \"mozilla-partners\"\n",
|
||||
" , \"taskcluster\"\n",
|
||||
" , \"mozilla-conduit\"\n",
|
||||
" , \"mozilla-platform-ops\"\n",
|
||||
" , \"nss-dev\"\n",
|
||||
" , \"mozilla-releng\"\n",
|
||||
" , \"mozilla-private\"\n",
|
||||
" , \"mozilla-frontend-infra\"\n",
|
||||
" , \"mozilla-bteam\"\n",
|
||||
" , \"iodide-project\"\n",
|
||||
" , \"mozilla-games\"\n",
|
||||
" , \"mozillaReality\"\n",
|
||||
" , \"mozilla-standards\"\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import github3\n",
|
||||
"def print_limits():\n",
|
||||
" print(\"reset at: {}, remaining {}\".format(gh.rate_limit()[\"rate\"][\"reset\"], gh.rate_limit()[\"rate\"][\"remaining\"]))\n",
|
||||
"try:\n",
|
||||
" gh = github3.login(token=api_key)\n",
|
||||
" print(\"You are authenticated as {}\".format(gh.me().login))\n",
|
||||
"except ConnectionError:\n",
|
||||
" print_limits()\n",
|
||||
"try:\n",
|
||||
" from functools import lru_cache\n",
|
||||
"except ImportError:\n",
|
||||
" from backports.functools_lru_cache import lru_cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"hidden": true
|
||||
},
|
||||
"source": [
|
||||
"From here on, use ``gh`` to access all data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@lru_cache(maxsize=32)\n",
|
||||
"def _search_for_user(user):\n",
|
||||
" l = list(gh.search_users(query=\"type:user \"+user))\n",
|
||||
" print(\"found {} potentials for {}\".format(len(l), user))\n",
|
||||
" return l\n",
|
||||
"\n",
|
||||
"def get_user_counts(user):\n",
|
||||
" l = _search_for_user(user)\n",
|
||||
" for u in l:\n",
|
||||
" yield u\n",
|
||||
" \n",
|
||||
" if '@' in user:\n",
|
||||
" l2 = _search_for_user(user.split('@')[0])\n",
|
||||
" for u in l2:\n",
|
||||
" yield u\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"displayed_users = set()\n",
|
||||
"def show_users(user_list):\n",
|
||||
" global displayed_users\n",
|
||||
" unique_users = set(user_list)\n",
|
||||
" count = len(unique_users)\n",
|
||||
" if count >10:\n",
|
||||
" print(\"... too many to be useful ...\")\n",
|
||||
" else:\n",
|
||||
" for u in [x for x in unique_users if not x in displayed_users]:\n",
|
||||
" displayed_users.add(u)\n",
|
||||
" user = u.user\n",
|
||||
" user.refresh()\n",
|
||||
" print(user.login, user.name, user.location, user.email)\n",
|
||||
" if 0 < count <= 10:\n",
|
||||
" return [u.login for u in unique_users]\n",
|
||||
" else:\n",
|
||||
" return []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class OutsideCollaboratorIterator(github3.structs.GitHubIterator):\n",
|
||||
" def __init__(self, org):\n",
|
||||
" super(OutsideCollaboratorIterator, self).__init__(\n",
|
||||
" count=-1, #get all\n",
|
||||
" url=org.url + \"/outside_collaborators\",\n",
|
||||
" cls=github3.users.ShortUser,\n",
|
||||
" session=org.session,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"@lru_cache(maxsize=32)\n",
|
||||
"def get_collaborators(org):\n",
|
||||
" collabs = [x.login.lower() for x in OutsideCollaboratorIterator(org)]\n",
|
||||
" return collabs\n",
|
||||
"\n",
|
||||
"def is_collaborator(org, login):\n",
|
||||
" return bool(login.lower() in get_collaborators(org))\n",
|
||||
"\n",
|
||||
"# provide same interface for members -- but the iterator is free :D\n",
|
||||
"@lru_cache(maxsize=32)\n",
|
||||
"def get_members(org):\n",
|
||||
" collabs = [x.login.lower() for x in org.members()]\n",
|
||||
" return collabs\n",
|
||||
"\n",
|
||||
"def is_member(org, login):\n",
|
||||
" return bool(login.lower() in get_members(org))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def check_login_perms(logins):\n",
|
||||
" any_perms = False\n",
|
||||
" for login in logins:\n",
|
||||
" is_collab = False\n",
|
||||
" for org in orgs_to_check:\n",
|
||||
" o = gh.organization(org)\n",
|
||||
" if is_member(o, login):\n",
|
||||
" url = \"https://github.com/orgs/{}/people?utf8=%E2%9C%93&query={}\".format(o.login, login)\n",
|
||||
" print(\"{} has {} as a member: {}\".format(o.login, login, url))\n",
|
||||
" is_collab = True\n",
|
||||
" if is_collaborator(o, login):\n",
|
||||
" url = \"https://github.com/orgs/{}/outside-collaborators?utf8=%E2%9C%93&query={}\".format(o.login, login)\n",
|
||||
" print(\"{} has {} as a collaborator: {}\".format(o.login, login, url))\n",
|
||||
" is_collab = True\n",
|
||||
" if is_collab:\n",
|
||||
" any_perms = True\n",
|
||||
" else:\n",
|
||||
" print(\"No permissions found for {}\".format(login))\n",
|
||||
" return any_perms"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"hidden": true,
|
||||
"init_cell": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"re_flags = re.MULTILINE | re.IGNORECASE\n",
|
||||
"\n",
|
||||
"def process_from_email(email_body):\n",
|
||||
" # get rid of white space\n",
|
||||
" email_body = os.linesep.join(\n",
|
||||
" [s.strip() for s in email_body.splitlines() if s.strip()]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" user = set()\n",
|
||||
" \n",
|
||||
" # Extract data from internal email format\n",
|
||||
" match = re.search(r'^Full Name: (?P<full_name>\\S.*)$', email_body, re_flags)\n",
|
||||
" user.add(match.group(\"full_name\") if match else None)\n",
|
||||
"\n",
|
||||
" match = re.search(r'^Email: (?P<primary_email>.*)$', email_body, re_flags)\n",
|
||||
" primary_email = match.group(\"primary_email\") if match else None\n",
|
||||
" user.add(primary_email)\n",
|
||||
" print(\"Check these URLs for Heroku activity:\")\n",
|
||||
" print(\" Mozillians: https://mozillians.org/en-US/search/?q={}\".format(primary_email.replace('@', '%40')))\n",
|
||||
" print(\" Heroku: https://dashboard.heroku.com/teams/mozillacorporation/access?filter={}\".format(primary_email.replace('@', '%40')))\n",
|
||||
" print(email_body)\n",
|
||||
"\n",
|
||||
" match = re.search(r'^Github Profile: (?P<github_profile>.*)$', email_body, re_flags)\n",
|
||||
" declared_github = match.group(\"github_profile\") if match else None\n",
|
||||
" user.add(declared_github)\n",
|
||||
"\n",
|
||||
" match = re.search(r'^Zimbra Alias: (?P<other_email>.*)$', email_body, re_flags)\n",
|
||||
" user.add(match.group(\"other_email\") if match else None)\n",
|
||||
"\n",
|
||||
" # we consider each token in the IM line as a possible GitHub login\n",
|
||||
" match = re.search(r'^IM:\\s*(.*)$', email_body, re_flags)\n",
|
||||
" if match:\n",
|
||||
" im_line = match.groups()[0]\n",
|
||||
" matches = re.finditer(r'\\W*((\\w+)(?:\\s+\\w+)*)', im_line)\n",
|
||||
" user.update([x.group(1) for x in matches] if matches else None)\n",
|
||||
"\n",
|
||||
" match = re.search(r'^Bugzilla Email: (?P<bz_email>.*)$', email_body, re_flags)\n",
|
||||
" user.add(match.group(\"bz_email\") if match else None)\n",
|
||||
"\n",
|
||||
" # clean up some noise, case insensitively\n",
|
||||
" # the tokens to ignore are added based on discovery,\n",
|
||||
" # they tend to cause the searches to get rate limited.\n",
|
||||
" user = {x.lower() for x in user if x}\n",
|
||||
" user = user - {None, \"irc\", \"slack\", \"skype\", \"b\", 'hotmail', 'mozilla', 'ro', 'com', 'softvision', 'mail', \n",
|
||||
" 'twitter', 'blog', 'https' }\n",
|
||||
" global displayed_users\n",
|
||||
" displayed_users = set()\n",
|
||||
" print(\"Trying '{}'\".format(\"', '\".join(user)))\n",
|
||||
" guesses = set()\n",
|
||||
" for term in user:\n",
|
||||
" new = show_users(get_user_counts(term))\n",
|
||||
" guesses.update({x.lower() for x in new})\n",
|
||||
" # include declared_github if it exists\n",
|
||||
" if declared_github:\n",
|
||||
" guesses.add(declared_github.lower())\n",
|
||||
" print(\"Checking logins {}\".format(guesses))\n",
|
||||
" found_perms = check_login_perms(guesses)\n",
|
||||
" print(\"Finished all reporting.\")\n",
|
||||
" if declared_github and not found_perms:\n",
|
||||
" # print some text to copy/paste into email\n",
|
||||
" print(\", even for declared login '{}'.\".format(declared_github))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start of common usage\n",
|
||||
"Usage steps:\n",
|
||||
"1. Fill in a way to load your PAT token in the first code cell\n",
|
||||
"2. Fill in the list of orgs to check in the second code cell\n",
|
||||
"3. For each user:\n",
|
||||
" 1. Copy entire text of email\n",
|
||||
" 2. Paste between the ``\"\"\"`` marks in the cell below.\n",
|
||||
" 3. Execute that cell\n",
|
||||
" \n",
|
||||
"The cell below should have the following text:\n",
|
||||
"```python\n",
|
||||
"process_from_email(r\"\"\"\n",
|
||||
" # paste email body here\n",
|
||||
"\"\"\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or if you're not processing an email, fake the two fields 'email:' and 'im:':\n",
|
||||
"```python\n",
|
||||
"process_from_email(r\"\"\"\n",
|
||||
"im: various possible names space separated\n",
|
||||
"email: primary_email@mozilla.com\n",
|
||||
"\"\"\")\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"process_from_email(r\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"\"\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"celltoolbar": "Initialization Cell",
|
||||
"kernelspec": {
|
||||
"display_name": "GitHub3.py",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.15rc1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Загрузка…
Ссылка в новой задаче