mirror of https://github.com/hpcaitech/ColossalAI
[workflow] cover all public repositories in weekly report (#4069)
parent
4a81faa5f3
commit
b463651f3e
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
@ -10,8 +9,7 @@ import seaborn
|
|||
from requests_toolbelt import MultipartEncoder
|
||||
|
||||
|
||||
@dataclass
|
||||
class Contributor:
|
||||
class Counter(dict):
|
||||
"""
|
||||
Dataclass for a github contributor.
|
||||
|
||||
|
@ -19,8 +17,40 @@ class Contributor:
|
|||
name (str): name of the contributor
|
||||
num_commits_this_week (int): number of commits made within one week
|
||||
"""
|
||||
name: str
|
||||
num_commits_this_week: int
|
||||
|
||||
def record(self, item: str):
|
||||
if item in self:
|
||||
self[item] += 1
|
||||
else:
|
||||
self[item] = 1
|
||||
|
||||
def to_sorted_list(self):
|
||||
data = [(key, value) for key, value in self.items()]
|
||||
data.sort(key=lambda x: x[1], reverse=True)
|
||||
return data
|
||||
|
||||
|
||||
def get_utc_time_one_week_ago():
|
||||
"""
|
||||
Get the UTC time one week ago.
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
start_datetime = now - timedelta(days=7)
|
||||
return start_datetime
|
||||
|
||||
|
||||
def datetime2str(dt):
|
||||
"""
|
||||
Convert datetime to string in the format of YYYY-MM-DDTHH:MM:SSZ
|
||||
"""
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def str2datetime(string):
|
||||
"""
|
||||
Convert string in the format of YYYY-MM-DDTHH:MM:SSZ to datetime
|
||||
"""
|
||||
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
|
||||
|
@ -36,7 +66,28 @@ def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title:
|
|||
plt.savefig(output_path, dpi=1200)
|
||||
|
||||
|
||||
def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
|
||||
def get_organization_repositories(github_token, organization_name) -> List[str]:
|
||||
"""
|
||||
Retrieve the public repositories under the organization.
|
||||
"""
|
||||
url = f"https://api.github.com/orgs/{organization_name}/repos?type=public"
|
||||
|
||||
# prepare header
|
||||
headers = {
|
||||
'Authorization': f'Bearer {github_token}',
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'X-GitHub-Api-Version': '2022-11-28'
|
||||
}
|
||||
|
||||
res = requests.get(url, headers=headers).json()
|
||||
repo_list = []
|
||||
|
||||
for item in res:
|
||||
repo_list.append(item['name'])
|
||||
return repo_list
|
||||
|
||||
|
||||
def get_issue_pull_request_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
|
||||
"""
|
||||
Retrieve the issue/PR comments made by our members in the last 7 days.
|
||||
|
||||
|
@ -56,7 +107,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
|
|||
# do pagination to the API
|
||||
page = 1
|
||||
while True:
|
||||
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
|
||||
comment_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/comments?since={since}&page={page}'
|
||||
comment_response = requests.get(comment_api, headers=headers).json()
|
||||
|
||||
if len(comment_response) == 0:
|
||||
|
@ -70,7 +121,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
|
|||
continue
|
||||
|
||||
issue_id = item['issue_url'].split('/')[-1]
|
||||
issue_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/{issue_id}'
|
||||
issue_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/{issue_id}'
|
||||
issue_response = requests.get(issue_api, headers=headers).json()
|
||||
issue_author_relationship = issue_response['author_association']
|
||||
|
||||
|
@ -87,7 +138,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
|
|||
return user_engagement_count
|
||||
|
||||
|
||||
def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
||||
def get_discussion_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
|
||||
"""
|
||||
Retrieve the discussion comments made by our members in the last 7 days.
|
||||
This is only available via the GitHub GraphQL API.
|
||||
|
@ -105,7 +156,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
|||
offset_str = f", after: \"{cursor}\""
|
||||
query = f"""
|
||||
{{
|
||||
repository(owner: "hpcaitech", name: "ColossalAI"){{
|
||||
repository(owner: "{org_name}", name: "{repo_name}"){{
|
||||
discussions(first: {num} {offset_str}){{
|
||||
edges {{
|
||||
cursor
|
||||
|
@ -134,7 +185,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
|||
offset_str = f", before: \"{cursor}\""
|
||||
query = f"""
|
||||
{{
|
||||
repository(owner: "hpcaitech", name: "ColossalAI"){{
|
||||
repository(owner: "{org_name}", name: "{repo_name}"){{
|
||||
discussion(number: {discussion_number}){{
|
||||
title
|
||||
comments(last: {num} {offset_str}){{
|
||||
|
@ -191,8 +242,8 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
|||
for edge in edges:
|
||||
# print the discussion title
|
||||
discussion = edge['node']
|
||||
discussion_updated_at = str2datetime(discussion['updatedAt'])
|
||||
|
||||
discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
# check if the updatedAt is within the last 7 days
|
||||
# if yes, add it to discussion_numbers
|
||||
if discussion_updated_at > since:
|
||||
|
@ -250,6 +301,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
|||
if reply['authorAssociation'] == 'MEMBER':
|
||||
# check if the updatedAt is within the last 7 days
|
||||
# if yes, add it to discussion_numbers
|
||||
|
||||
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
if reply_updated_at > since:
|
||||
member_name = reply['author']['login']
|
||||
|
@ -260,7 +312,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
|||
return user_engagement_count
|
||||
|
||||
|
||||
def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
|
||||
def generate_user_engagement_leaderboard_image(github_token: str, org_name: str, repo_list: List[str], output_path: str) -> bool:
|
||||
"""
|
||||
Generate the user engagement leaderboard image for stats within the last 7 days
|
||||
|
||||
|
@ -270,23 +322,29 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
|
|||
"""
|
||||
|
||||
# request to the Github API to get the users who have replied the most in the last 7 days
|
||||
now = datetime.utcnow()
|
||||
start_datetime = now - timedelta(days=7)
|
||||
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
start_datetime = get_utc_time_one_week_ago()
|
||||
start_datetime_str = datetime2str(start_datetime)
|
||||
|
||||
# get the issue/PR comments and discussion comment count
|
||||
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
|
||||
discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
|
||||
total_engagement_count = {}
|
||||
|
||||
# update the total engagement count
|
||||
total_engagement_count.update(issue_pr_engagement_count)
|
||||
for name, count in discussion_engagement_count.items():
|
||||
if name in total_engagement_count:
|
||||
total_engagement_count[name] += count
|
||||
else:
|
||||
total_engagement_count[name] = count
|
||||
def _update_count(counter):
|
||||
for name, count in counter.items():
|
||||
if name in total_engagement_count:
|
||||
total_engagement_count[name] += count
|
||||
else:
|
||||
total_engagement_count[name] = count
|
||||
|
||||
|
||||
for repo_name in repo_list:
|
||||
print(f"Fetching user engagement count for {repo_name}/{repo_name}")
|
||||
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime_str)
|
||||
discussion_engagement_count = get_discussion_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime)
|
||||
|
||||
# update the total engagement count
|
||||
_update_count(issue_pr_engagement_count)
|
||||
_update_count(discussion_engagement_count)
|
||||
|
||||
# prepare the data for plotting
|
||||
x = []
|
||||
y = []
|
||||
|
@ -302,9 +360,6 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
|
|||
x.append(count)
|
||||
y.append(name)
|
||||
|
||||
# use Shanghai time to display on the image
|
||||
start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# plot the leaderboard
|
||||
xlabel = f"Number of Comments made (since {start_datetime_str})"
|
||||
ylabel = "Member"
|
||||
|
@ -315,7 +370,7 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
|
|||
return False
|
||||
|
||||
|
||||
def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
|
||||
def generate_contributor_leaderboard_image(github_token, org_name, repo_list, output_path) -> bool:
|
||||
"""
|
||||
Generate the contributor leaderboard image for stats within the last 7 days
|
||||
|
||||
|
@ -324,54 +379,81 @@ def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
|
|||
output_path (str): the path to save the image
|
||||
"""
|
||||
# request to the Github API to get the users who have contributed in the last 7 days
|
||||
URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {github_token}',
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'X-GitHub-Api-Version': '2022-11-28'
|
||||
}
|
||||
|
||||
while True:
|
||||
response = requests.get(URL, headers=headers).json()
|
||||
counter = Counter()
|
||||
start_datetime = get_utc_time_one_week_ago()
|
||||
|
||||
if len(response) != 0:
|
||||
# sometimes the Github API returns empty response for unknown reason
|
||||
# request again if the response is empty
|
||||
break
|
||||
def _get_url(org_name, repo_name, page):
|
||||
return f'https://api.github.com/repos/{org_name}/{repo_name}/pulls?per_page=50&page={page}&state=closed'
|
||||
|
||||
contributor_list = []
|
||||
def _iterate_by_page(org_name, repo_name):
|
||||
page = 1
|
||||
stop = False
|
||||
|
||||
# get number of commits for each contributor
|
||||
start_timestamp = None
|
||||
for item in response:
|
||||
num_commits_this_week = item['weeks'][-1]['c']
|
||||
name = item['author']['login']
|
||||
contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
|
||||
contributor_list.append(contributor)
|
||||
while not stop:
|
||||
print(f"Fetching pull request data for {org_name}/{repo_name} - page{page}")
|
||||
url = _get_url(org_name, repo_name, page)
|
||||
|
||||
# update start_timestamp
|
||||
start_timestamp = item['weeks'][-1]['w']
|
||||
while True:
|
||||
response = requests.get(url, headers=headers).json()
|
||||
|
||||
if isinstance(response, list):
|
||||
# sometimes the Github API returns nothing
|
||||
# request again if the response is not a list
|
||||
break
|
||||
print("Empty response, request again...")
|
||||
|
||||
if len(response) == 0:
|
||||
# if the response is empty, stop
|
||||
stop = True
|
||||
break
|
||||
|
||||
# count the pull request and author from response
|
||||
for pr_data in response:
|
||||
merged_at = pr_data['merged_at']
|
||||
author = pr_data['user']['login']
|
||||
|
||||
if merged_at is None:
|
||||
continue
|
||||
|
||||
merge_datetime = str2datetime(merged_at)
|
||||
|
||||
if merge_datetime < start_datetime:
|
||||
# if we found a pull request that is merged before the start_datetime
|
||||
# we stop
|
||||
stop = True
|
||||
break
|
||||
else:
|
||||
# record the author1
|
||||
counter.record(author)
|
||||
|
||||
# next page
|
||||
page += 1
|
||||
|
||||
for repo_name in repo_list:
|
||||
_iterate_by_page(org_name, repo_name)
|
||||
|
||||
# convert unix timestamp to Beijing datetime
|
||||
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
|
||||
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
bj_start_datetime = datetime.fromtimestamp(start_datetime.timestamp(), tz=pytz.timezone('Asia/Shanghai'))
|
||||
bj_start_datetime_str = datetime2str(bj_start_datetime)
|
||||
|
||||
# sort by number of commits
|
||||
contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
|
||||
contribution_list = counter.to_sorted_list()
|
||||
|
||||
# remove contributors who has zero commits
|
||||
contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
|
||||
|
||||
# prepare the data for plotting
|
||||
x = [x.num_commits_this_week for x in contributor_list]
|
||||
y = [x.name for x in contributor_list]
|
||||
author_list = [x[0] for x in contribution_list]
|
||||
num_commit_list = [x[1] for x in contribution_list]
|
||||
|
||||
# plot
|
||||
if len(x) > 0:
|
||||
xlabel = f"Number of Commits (since {start_datetime_str})"
|
||||
if len(author_list) > 0:
|
||||
xlabel = f"Number of Pull Requests (since {bj_start_datetime_str})"
|
||||
ylabel = "Contributor"
|
||||
title = 'Active Contributor Leaderboard'
|
||||
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
|
||||
plot_bar_chart(num_commit_list, author_list, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
@ -438,10 +520,14 @@ if __name__ == '__main__':
|
|||
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
|
||||
CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
|
||||
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
|
||||
ORG_NAME = "hpcaitech"
|
||||
|
||||
# get all open source repositories
|
||||
REPO_LIST = get_organization_repositories(GITHUB_TOKEN, ORG_NAME)
|
||||
|
||||
# generate images
|
||||
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
|
||||
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
|
||||
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, CONTRIBUTOR_IMAGE_PATH)
|
||||
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, USER_ENGAGEMENT_IMAGE_PATH)
|
||||
|
||||
# upload images
|
||||
APP_ID = os.environ['LARK_APP_ID']
|
||||
|
@ -457,8 +543,8 @@ if __name__ == '__main__':
|
|||
2. 用户互动榜单
|
||||
|
||||
注:
|
||||
- 开发贡献者测评标准为:本周由公司成员提交的commit次数
|
||||
- 用户互动榜单测评标准为:本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
|
||||
- 开发贡献者测评标准为:本周由公司成员与社区在所有开源仓库提交的Pull Request次数
|
||||
- 用户互动榜单测评标准为:本周由公司成员在非成员在所有开源仓库创建的issue/PR/discussion中回复的次数
|
||||
"""
|
||||
|
||||
send_message_to_lark(message, LARK_WEBHOOK_URL)
|
||||
|
@ -467,7 +553,7 @@ if __name__ == '__main__':
|
|||
if contrib_success:
|
||||
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
|
||||
else:
|
||||
send_message_to_lark("本周没有成员贡献commit,无榜单图片生成。", LARK_WEBHOOK_URL)
|
||||
send_message_to_lark("本周没有成员贡献PR,无榜单图片生成。", LARK_WEBHOOK_URL)
|
||||
|
||||
# send user engagement image to lark
|
||||
if engagement_success:
|
||||
|
|
Loading…
Reference in New Issue