Commit 21c316e6 authored by Jason K's avatar Jason K Committed by ace

Gitub enterprise migration example code.

Also includes Active Directory/LDAP sync code and automated hook
addition.
parent d6a08baf
# Github Enterprise Migration and Gitlab Active Directory Sync Example
This directory contains example code that was used to migrate an installation of Github Enterprise to Gitlab. It also has a script created to periodically sync LDAP/Active Directory users with Gitlab and assign permissions based on group membership. Finally a simple script to add a custom hook to all group repositories is included.
All code relies solely on the Github and Gitlab APIs for interaction (octokit, gitlab, git, and net/ldap gems). All configuration is done in the top level scripts (migrate.rb, update_gitlab.rb, and update_hooks.rb). All LDAP, Github, and Gitlab specific code is fairly generic and decoupled in classes located in corresponding files under ./jk
The Github migration should be run as a user that has admin access to all organizations and repositories you wish to migrate. The Gitlab code should also run as an admin user.
Of course this code will likely require heavy modification to suite individual needs but should serve as a decent example.
#!/opt/gitlab/embedded/bin/ruby
# require 'openssl'
require './jk/ad'
require './jk/githubz'
require './jk/gitlabz'
# ruby lets us redefine constants, so we should do it. right?
# OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
require 'net/ldap'
module Jk
class Ad
def initialize(host, username, password, base)
@ldap = Net::LDAP.new :host => host,
:port => 389,
:auth => {
:method => :simple,
:username => username,
:password => password
}
@treebase = base
end
def get_user_login_from_cn(user_name, search_term = "cn")
attrs = ['samaccountname', 'title', 'displayname', 'memberOf']
user_hash = {}
filter = Net::LDAP::Filter.eq(search_term, user_name)
@ldap.search(:base => @treebase, :filter => filter,
:attributes => attrs) { |ls|
ls.each { |key, vals|
vals.each { |val|
user_hash[key] = val
}
}
}
return user_hash
end
def get_user_cn_from_login(user_name, search_term = "samaccountname")
atr = 'dn'
attrs = [atr]
users_hash = {}
filter = Net::LDAP::Filter.eq(search_term, user_name)
@ldap.search(:base => @treebase, :filter => filter,
:attributes => attrs) { |entry|
entry[atr.to_sym].each { |value|
if (value =~ /^CN=(.+)/) then
user_name = value
end
}
}
return user_name
end
def get_ldap_group_members_info_hash(group_name)
mem_atr = 'member'
attrs = [mem_atr]
users_hash = {}
filter = Net::LDAP::Filter.eq("cn", group_name)
@ldap.search(:base => @treebase, :filter => filter,
:attributes => attrs) { |entry|
entry[mem_atr.to_sym].each { |value|
if (value =~ /^CN=([^,]+)/) then
user_name = $1
user_cn = value
user_info = get_user_login_from_cn(user_name);
users_hash[user_cn] = user_info
end
}
}
return users_hash
end
def get_ldap_group_members_cn_array(group_name)
mem_atr = 'member'
attrs = [mem_atr]
users_arr = []
filter = Net::LDAP::Filter.eq("cn", group_name)
# if this group exists in AD we want to get the current group
# members
@ldap.search(:base => @treebase, :filter => filter,
:attributes => attrs) { |entry|
entry[mem_atr.to_sym].each { |value|
if (value =~ /^CN=([^,]+)/) then
user_name = $1
user_cn = value
users_arr.push(user_cn)
end
}
}
return users_arr
end
# Pass in a hash of team_name => members and a hash of org_name => teams
# Gitlab has no concept of teams so we'll just grant them access directly
# to each gitlab group (github orgs == gitlab groups)
#
# From github, the team hash => member name array is in form of github logins
# eg "sbody" or whatever
# What we want to end up with is team_name => array of user_ldap_cns
# eg "CN=Some Body,OU=Users,OU=ad,DC=testersDC=com"
#
# The reason we want a key of CN is due to querying ldap membership returning
# only CNs. We want a quick lookup on that.
def get_membership_hashes(orgs_hash, groups_hash, ldap_github_group)
@ad_groups = groups_hash
@orgs = orgs_hash
github_ad_members = get_ldap_group_members_info_hash(ldap_github_group)
@ad_groups.each { |ad_group_name, value|
# if there is a ad group we'll pull members from there and
# ignore what was already in this hash for this ad_group_name (which
# likely came in from github and was an array of user_names
# instead of CNs)
user_hash_array = get_ldap_group_members_cn_array(ad_group_name)
if (user_hash_array.size > 0)
@ad_groups[ad_group_name] = user_hash_array
else
# if there was no corresponding ad group then we'll replace
# the user names with CNs
# eg: "sbody" becomes "CN=Some Body,OU=Users,OU=testers,DC=com"
user_cns = []
@ad_groups[ad_group_name].each { |user_name|
user_cns.push(get_user_cn_from_login(user_name))
}
@ad_groups[ad_group_name] = user_cns
end
}
return @orgs, @ad_groups, github_ad_members
end
end
end
require 'set'
require 'octokit'
module Jk
class Githubz
def initialize(login, password, api_endpoint, web_endpoint)
@gh_client = Octokit::Client.new(
:login => login,
:password => password,
:api_endpoint => api_endpoint,
:web_endpoint => web_endpoint
)
@per_page=100
end
def get_org_repo_hash
org_repo_hash = {}
@gh_client.organizations(:per_page => @per_page).each { |org|
org_repo_hash[org.login] = []
repos = @gh_client.repositories(org.login, :per_page => @per_page)
repos.each { |repo|
org_repo_hash[org.login].push(repo.name)
}
}
return org_repo_hash
end
def get_org_teams
org_teams = {}
team_members = {}
@gh_client.organizations.each { |org|
org_teams[org.login.to_sym] = []
ts = @gh_client.organization_teams(org.login)
ts.each { |t|
if (t.name == 'Owners')
team_name = org.login + '.' + t.name
else
team_name = t.name
end
org_teams[org.login.to_sym].push(team_name)
team_members[team_name.to_sym] = [] if !team_members[team_name.to_sym]
members = @gh_client.team_members(t.id)
members.each { |m|
team_members[team_name.to_sym].push(m.login.downcase)
}
}
}
# I could have just created Sets to begin with but whatevs
# simple hack to get rid of dupes
team_members.each { |k,v| team_members[k] = Set.new(team_members[k]).to_a }
return org_teams, team_members
end
def get_user_key_hash
user_keys = {}
last_id_seen = 0;
users = []
loop {
users_ = @gh_client.all_users(:since => last_id_seen)
if (users_.size > 0)
cur_last_id = users_[users_.size - 1].id
if (last_id_seen == cur_last_id)
break
end
last_id_seen = cur_last_id
users.concat(users_)
else
break
end
}
users.each { |user|
next if user.id == 1 || user.type != 'User'
user_login = user.login.downcase
user_keys[user_login] = []
@gh_client.user_keys(user_login).each { |key|
user_keys[user_login].push(key.key)
}
}
user_keys
end
# return array of hashes containing pull request number, title, commits,
# and comments
def get_pull_requests_comments_for_repo(repo_full_name)
return_array = []
page = 1
prs = []
begin
loop {
prs_ = @gh_client.pull_requests(repo_full_name, :per_page => @per_page,
:state => 'closed', :page => page)
prs.concat(prs_)
page += 1
break if prs_.size < @per_page
}
page = 1
loop {
prs_ = @gh_client.pull_requests(repo_full_name, :per_page => @per_page,
:state => 'open', :page => page)
prs.concat(prs_)
page += 1
break if prs_.size < @per_page
}
rescue Exception => e
puts("Unable to get pull requests for #{repo_full_name} #{e}")
return return_array
end
prs.sort_by! { |p| p.number }
if (prs.size == 0)
puts("#{repo_full_name} has no pull requests")
end
cnt = 1
prs.each { |pr|
# if we don't have contiguous pull request numbers we'll create
# a dummy to keep our pull request numbers equal from github to gitlab
# this happens if an issue was created in github which was not
# an actual pull request. We're only migrating pull requests here.
while (pr.number > cnt)
puts("making dummy #{pr.number} #{cnt}")
current_pull_hash = {}
current_pull_hash[:number] = cnt
current_pull_hash[:title] = "Dummy issue"
current_pull_hash[:body] = ""
current_pull_hash[:commits] = []
current_pull_hash[:comments] = []
return_array.push(current_pull_hash)
cnt += 1
end
puts("getting #{repo_full_name}##{pr.number} #{cnt}")
cnt += 1
current_pull_hash = {}
current_pull_hash[:number] = pr.number
current_pull_hash[:title] = pr.title
current_pull_hash[:body] = pr.body
# COMMITS ----------------------------------------
page = 1
#commits = []
#loop {
commits = @gh_client.pull_request_commits(repo_full_name, pr.number,
:per_page => @per_page, :page => page)
#commits.concat(commits_)
#page += 1
#break if commits_.size < @per_page
#}
current_pull_hash[:commits] = []
commits.each { |c|
commit_hash = {}
user = c.commit.author.email
user = user[0,(user.rindex('@')||user.length)]
commit_hash[:username] = user
commit_hash[:sha] = c.sha[0,7]
commit_hash[:date] = c.commit.author.date
commit_hash[:message] = c.commit.message
current_pull_hash[:commits].push(commit_hash)
}
# COMMENTS ----------------------------------------
page = 1
comments = []
#loop {
comments.concat(@gh_client.pull_request_comments(
repo_full_name, pr.number, :per_page => @per_page, :page => page))
comments.concat(@gh_client.issue_comments(repo_full_name, pr.number,
:per_page => @per_page, :page => page))
#:per_page => @per_page, :page => page)
#comments.concat(comments_)
#page += 1
#break if comments_.size < @per_page
#}
current_pull_hash[:comments] = []
comments.each { |c|
comment_hash = {}
comment_hash[:username] = c.user.login
comment_hash[:date] = c.created_at
comment_hash[:body] = c.body
current_pull_hash[:comments].push(comment_hash)
}
return_array.push(current_pull_hash)
}
return_array
end
end
end
require 'set'
require 'cgi'
require 'gitlab'
class Gitlab::Client
# for some reason these don't exist in the gem api impl. :(
# so we'll add our own defs for the missing api implementations
module Users
def create_key_for_user_id(user_id, title, key)
post("/users/#{user_id}/keys", :body => {:title => title, :key => key})
end
end
module Projects
def edit_project(project_id, options={})
put("/projects/#{project_id}", :body => options)
end
end
end
module Jk
class Gitlabz
def initialize(api_endpoint, admin_token)
@gl_client = Gitlab::Client.new(
:endpoint => api_endpoint,
:private_token => admin_token)
@user_cn_hash = {}
@user_id_hash = {}
@group_id_hash = {}
@proj_name_id_hash = {}
@per_page = 100
end
def add_key_for_user(user_name, key_name, key)
user_id = get_users_id_hash()[user_name]
return if !user_id
begin
@gl_client.create_key_for_user_id(user_id, key_name, key)
rescue Gitlab::Error::BadRequest => br
# if this key exists we don't care
end
end
def get_users_id_hash
return @user_id_hash if @user_id_hash.size > 0
page = 1
loop {
user_arr = @gl_client.users(
{:per_page => @per_page, :page => page})
user_arr.each { |user|
@user_id_hash[user.username.downcase] = user.id
}
page += 1
break if user_arr.size < @per_page
}
@user_id_hash
end
def get_users_cn_hash(force_update = false)
return @user_cn_hash if @user_cn_hash.size > 0 && !force_update
page = 1
loop {
user_arr = @gl_client.users(
{:per_page => @per_page, :page => page})
user_arr.each { |user|
if (user.identities.size > 0)
extern_uid = user.identities[0]["extern_uid"]
@user_cn_hash[extern_uid] = user.id
end
}
page += 1
break if user_arr.size < @per_page
}
@user_cn_hash
end
def is_group_member(group_id, user_id)
if (!@group_members_hash)
@group_members_hash = {}
else
if (!@group_members_hash[group_id])
@group_members_hash[group_id] = get_group_members(group_id)
end
return @group_members_hash[group_id].include?(user_id)
end
end
def get_group_members(group_id)
ret_set = Set.new()
page = 1
loop {
group_mem = @gl_client.group_members(group_id,
{:per_page => @per_page, :page => page})
group_mem.each { |member|
ret_set.add(member.id)
}
page += 1
break if group_mem.size < @per_page
}
ret_set
end
def get_groups_hash
return @group_id_hash if @group_id_hash.size > 0
page = 1
loop {
group_arr = @gl_client.groups(
{:per_page => @per_page, :page => page})
group_arr.each { |group|
@group_id_hash[group.name] = group.id
}
page += 1
break if group_arr.size < @per_page
}
@group_id_hash
end
def add_dev_to_group(user_id, group_id, perm="30")
begin
@gl_client.add_group_member(group_id, user_id, perm)
rescue Exception => nfe
puts(nfe)
end
end
def add_user(email, password, username, name, ldap_cn, bio)
username = username.downcase
if ! get_users_id_hash()[username]
user = @gl_client.create_user(
email,
password,
:username => username,
:name => name,
:provider => "ldap",
:extern_uid => ldap_cn,
:bio => bio,
:confirm => 0
)
get_users_id_hash()[user.username] = user.id
end
get_users_id_hash()[username]
end
def create_or_get_group_id(org_name)
begin
group = @gl_client.group(org_name)
rescue Gitlab::Error::NotFound => nfe
group = @gl_client.create_group(org_name, org_name)
@group_id_hash[group.name] = group.id
end
return group.id
end
def create_and_get_project_id(repo_name, org_name)
new_repo_name = repo_name.gsub('.', '_')
puts("Creating #{org_name}/#{new_repo_name}")
#begin
#project = @gl_client.project(CGI.escape(org_name + "/" + repo_name))
#rescue Gitlab::Error::NotFound => nfe
group_id = create_or_get_group_id(org_name)
project = @gl_client.create_project(
new_repo_name,
:namespace_id => group_id
)
# gitlab won't allow us to create repos that are Camel Cased or have
# dots in the name (super cool) but it will let me rename them
# once I create them (makes total sense)
@gl_client.edit_project(project.id, :name => repo_name, :path => repo_name)
#end
return project.id
end
def edit_project_name(org_name, new_name, curr_name)
begin
project = @gl_client.project(CGI.escape(org_name + "/" + curr_name))
@gl_client.edit_project(project.id, :name => new_name, :path => new_name)
return project.id
rescue Gitlab::Error::NotFound => nfe
puts("#{curr_name} not found")
end
end
def get_project_name_id_hash
return @proj_name_id_hash if @proj_name_id_hash.size > 0
page = 1
loop {
pa = @gl_client.projects(:per_page => @per_page, :page => page)
pa.each { |proj|
@proj_name_id_hash[proj.path_with_namespace] = proj.id
}
page += 1
break if pa.size < @per_page
}
@proj_name_id_hash
end
def create_issue(repo_full_name, title, body)
#begin
project_id = get_project_name_id_hash()[repo_full_name]
gl_issue = @gl_client.create_issue(project_id, title, :description => body)
@gl_client.close_issue(project_id, gl_issue.id)
#rescue Exception => e
#puts(e)
#end
end
def unprotect_branches_for_project(project_id)
@gl_client.branches(project_id).each { |branch|
puts("Unprotecting #{project_id} #{branch.name}")
@gl_client.unprotect_branch(project_id, branch.name)
}
end
def add_hook_to_proj(proj_id, url)
hooks = @gl_client.project_hooks(proj_id, :per_page => @per_page)
hooks.each { |hook|
if(hook.url == url)
#puts("#{url} already added to proj id #{proj_id}")
return
end
}
puts("Adding #{url} to proj id #{proj_id}")
@gl_client.add_project_hook(
proj_id,
url,
:push_events => 1,
:issues_events => 0,
:merge_requests => 0,
:tag_push_events => 0
)
end
end
end
#!/opt/gitlab/embedded/bin/ruby
require './jk'
require 'git'
# Migration script for Github -> Gitlab
def do_migration
my_domain = "your.domain"
gh_ssh_url_base="git@github.your.domain"
gl_ssh_url_base="git@gitlab.your.domain"
tmp_git_dir="/git-data/tmp"
hook_url = "https://your.domain/jira/gitlab_hook"
ldap_github_group = "GithubUsersGroup"
ad_username = "user@your.domain"
ad_password = "xxxxxxxxxxxxxxxxxxxxxx"
ad_base = "dc=your,dc=domain"
ad_host = "your.ad.host"
gh_login = "github_user",
gh_password = "xxxxxxxxxxxxxxxxxxxxxx"
gh_api_endpoint = "https://github.your.domain/api/v3/"
gh_web_endpoint = "https://github.your.domain/"
gl_api_endpoint = 'https://gitlab.your.domain/api/v3'
gl_admin_token = 'xxxxxxxxxxxxxxxxxxxx'
gh = Jk::Githubz.new(gh_login, gh_password, gh_api_endpoint, gh_web_endpoint)
gl = Jk::Gitlabz.new(gl_api_endpoint, gl_admin_token)
org_teams_hash,teams_members_hash = gh.get_org_teams
ad = Jk::Ad.new(ad_host, ad_username, ad_password, ad_base)
org_teams_hash,teams_members_hash,cn_user_info_hash =
ad.get_membership_hashes(org_teams_hash, teams_members_hash, ldap_github_group)
# create users from our github group in ldap
cn_user_info_hash.each { |cn, user_info_hash|
#"CN=Some Body,OU=Users,OU=ad,DC=testers,DC=com" => {
# :dn=>"CN=Some Body,OU=Users,OU=ad,DC=testers,DC=com",
# :title=>"Senior Wizard",
# :displayname=>"Some Body",
# :memberof=>"CN=.All Users,OU=Distribution Lists,OU=Users,DC=testers,DC=com",
# :samaccountname=>"SBody"
# }
puts("Adding #{user_info_hash[:displayname]}")
gl.add_user(
"#{user_info_hash[:samaccountname].downcase}@#{my_domain}",
"2689009d91eb2837804a9ca1c598c461", # password doesn't matter for ldap
user_info_hash[:samaccountname].downcase,
user_info_hash[:displayname],
cn,
"#{user_info_hash[:title]}"
)
}
# get user keys from github and store them in gitlab per user
user_key_hash = gh.get_user_key_hash
user_key_hash.each { |username, keys|
cnt = 1
keys.each { |key|
puts("adding #{username} Key import #{cnt} #{key}")
gl.add_key_for_user(username, "Key import #{cnt}", key)
cnt += 1
}
}
# create groups (org) and projects (repos)
gh_org_repos = gh.get_org_repo_hash
gh_org_repos.each { |gh_org_name, gh_repo_name_arr|
gl_org_id = gl.create_or_get_group_id(gh_org_name)
gh_repo_name_arr.each { |gh_repo_name|
gl_proj_id = gl.create_and_get_project_id(gh_repo_name, gh_org_name)
}
}
# take github org teams and apply the perms to gitlab
# gitlab has no team concept so we'll give either owner or dev
# privs to each user based on their github privs.
org_teams_hash.each { |gh_org_name, gh_teams|
gh_teams.each { |gh_team|
teams_members_hash[gh_team.to_sym].each { |user_cn|
gl_org_id = gl.create_or_get_group_id(gh_org_name)
user_hash = cn_user_info_hash[user_cn]
if user_hash
user_id = user_hash[:samaccountname].downcase
if (gh_team =~ /Owners$/)
puts("Adding OWNER user #{user_id} to org #{gh_org_name} with org id #{gl_org_id} for team #{gh_team}")
perm = 50
else
puts("Adding user #{user_id} to org #{gh_org_name} with org id #{gl_org_id} for team #{gh_team}")
perm = 30
end
if (gl.get_users_id_hash()[user_id])
gl.add_dev_to_group(gl.get_users_id_hash()[user_id], gl_org_id, perm)
else
puts("Can't find #{user_id}")
end
end
}
}
}
# time to clone git repos
gh_org_repos = gh.get_org_repo_hash
gh_org_repos.each { |gh_org_name, gh_repo_name_arr|
gh_repo_name_arr.each { |gh_repo_name|
repo_name = gh_repo_name
gh_ssh_url = "#{gh_ssh_url_base}:#{gh_org_name}/#{gh_repo_name}.git"
local_repo_name = "#{repo_name}.git"
puts("clone #{gh_ssh_url} to #{gh_org_name}/#{local_repo_name}")
FileUtils.mkdir_p("#{tmp_git_dir}/#{gh_org_name}")
if File.directory?("#{tmp_git_dir}/#{gh_org_name}/#{local_repo_name}")
git_repo = Git.bare("#{tmp_git_dir}/#{gh_org_name}/#{local_repo_name}")
git_repo.fetch
else
git_repo = Git.clone(gh_ssh_url, "#{local_repo_name}", :path => "#{tmp_git_dir}/#{gh_org_name}", :bare => 1)
git_repo.add_remote("gitlab", "#{gl_ssh_url_base}:#{gh_org_name}/#{local_repo_name}")
end