mirror of https://github.com/hpcaitech/ColossalAI
[workflow] added discussion stats to community report (#2572)
* [workflow] added discussion stats to community report * polish codepull/2585/head
parent
b0c29d1b4c
commit
8af5a0799b
|
@ -25,5 +25,5 @@ jobs:
|
|||
env:
|
||||
LARK_APP_ID: ${{ secrets.LARK_LEADERBOARD_APP_ID }}
|
||||
LARK_APP_SECRET: ${{ secrets.LARK_LEADERBOARD_APP_SECRET }}
|
||||
LARK_WEBHOOK_URL: ${{ secrets.LARK_LEADERBOARD_WEBHOOK_URL }}
|
||||
LARK_WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import pytz
|
||||
|
@ -11,16 +12,38 @@ from requests_toolbelt import MultipartEncoder
|
|||
|
||||
@dataclass
|
||||
class Contributor:
|
||||
"""
|
||||
Dataclass for a github contributor.
|
||||
|
||||
Args:
|
||||
name (str): name of the contributor
|
||||
num_commits_this_week (int): number of commits made within one week
|
||||
"""
|
||||
name: str
|
||||
num_commits_this_week: int
|
||||
|
||||
|
||||
def generate_user_engagement_leaderboard_image(github_token, output_path):
|
||||
# request to the Github API to get the users who have replied the most in the last 7 days
|
||||
now = datetime.utcnow()
|
||||
start_datetime = now - timedelta(days=7)
|
||||
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
|
||||
"""
|
||||
This function is a utility to plot the bar charts.
|
||||
"""
|
||||
plt.clf()
|
||||
seaborn.color_palette()
|
||||
fig = seaborn.barplot(x=x, y=y)
|
||||
fig.set(xlabel=xlabel, ylabel=ylabel, title=title)
|
||||
seaborn.despine()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=1200)
|
||||
|
||||
|
||||
def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
|
||||
"""
|
||||
Retrive the issue/PR comments made by our members in the last 7 days.
|
||||
|
||||
Args:
|
||||
github_token (str): GitHub access token for API calls
|
||||
since (str): the path parameter required by GitHub Restful APIs, in the format of YYYY-MM-DDTHH:MM:SSZ
|
||||
"""
|
||||
# prepare header
|
||||
headers = {
|
||||
'Authorization': f'Bearer {github_token}',
|
||||
|
@ -33,7 +56,7 @@ def generate_user_engagement_leaderboard_image(github_token, output_path):
|
|||
# do pagination to the API
|
||||
page = 1
|
||||
while True:
|
||||
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={start_datetime_str}&page={page}'
|
||||
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
|
||||
comment_response = requests.get(comment_api, headers=headers).json()
|
||||
|
||||
if len(comment_response) == 0:
|
||||
|
@ -61,67 +84,301 @@ def generate_user_engagement_leaderboard_image(github_token, output_path):
|
|||
else:
|
||||
user_engagement_count[member_name] = 1
|
||||
page += 1
|
||||
return user_engagement_count
|
||||
|
||||
# plot the leaderboard
|
||||
|
||||
def get_discussion_comments(github_token, since) -> Dict[str, int]:
|
||||
"""
|
||||
Retrive the discussion comments made by our members in the last 7 days.
|
||||
This is only available via the GitHub GraphQL API.
|
||||
|
||||
Args:
|
||||
github_token (str): GitHub access token for API calls
|
||||
since (Datetime): the query parameter to determine whether the comment is made this week
|
||||
"""
|
||||
|
||||
# use graphql to get the discussions updated in the last 7 days
|
||||
def _generate_discussion_query(num, cursor: str = None):
|
||||
if cursor is None:
|
||||
offset_str = ""
|
||||
else:
|
||||
offset_str = f", after: \"{cursor}\""
|
||||
query = f"""
|
||||
{{
|
||||
repository(owner: "hpcaitech", name: "ColossalAI"){{
|
||||
discussions(first: {num} {offset_str}){{
|
||||
edges {{
|
||||
cursor
|
||||
node{{
|
||||
title
|
||||
author{{
|
||||
login
|
||||
}}
|
||||
number
|
||||
authorAssociation
|
||||
updatedAt
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
return query
|
||||
|
||||
def _generate_comment_reply_count_for_discussion(discussion_number, num, cursor: str = None):
|
||||
# here we assume that each comment will not have more than 100 replies for simplicity
|
||||
# otherwise, we have to go through pagination for both comment and reply
|
||||
if cursor is None:
|
||||
offset_str = ""
|
||||
else:
|
||||
offset_str = f", before: \"{cursor}\""
|
||||
query = f"""
|
||||
{{
|
||||
repository(owner: "hpcaitech", name: "ColossalAI"){{
|
||||
discussion(number: {discussion_number}){{
|
||||
title
|
||||
comments(last: {num} {offset_str}){{
|
||||
edges{{
|
||||
cursor
|
||||
node {{
|
||||
author{{
|
||||
login
|
||||
}}
|
||||
updatedAt
|
||||
authorAssociation
|
||||
replies (last: 100) {{
|
||||
edges {{
|
||||
node {{
|
||||
author {{
|
||||
login
|
||||
}}
|
||||
updatedAt
|
||||
authorAssociation
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
return query
|
||||
|
||||
# a utility function to make call to Github GraphQL API
|
||||
def _call_graphql_api(query):
|
||||
headers = {"Authorization": f"Bearer {github_token}"}
|
||||
json_data = {'query': query}
|
||||
response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
|
||||
data = response.json()
|
||||
return data
|
||||
|
||||
# get the discussion numbers updated in the last 7 days
|
||||
discussion_numbers = []
|
||||
num_per_request = 10
|
||||
cursor = None
|
||||
while True:
|
||||
query = _generate_discussion_query(num_per_request, cursor)
|
||||
data = _call_graphql_api(query)
|
||||
found_discussion_out_of_time_range = False
|
||||
|
||||
edges = data['data']['repository']['discussions']['edges']
|
||||
if len(edges) == 0:
|
||||
break
|
||||
else:
|
||||
# keep the discussion whose author is not a member
|
||||
for edge in edges:
|
||||
# print the discussion title
|
||||
discussion = edge['node']
|
||||
|
||||
discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
# check if the updatedAt is within the last 7 days
|
||||
# if yes, add it to dicussion_numbers
|
||||
if discussion_updated_at > since:
|
||||
if discussion['authorAssociation'] != 'MEMBER':
|
||||
discussion_numbers.append(discussion['number'])
|
||||
else:
|
||||
found_discussion_out_of_time_range = True
|
||||
|
||||
if found_discussion_out_of_time_range:
|
||||
break
|
||||
else:
|
||||
# update cursor
|
||||
cursor = edges[-1]['cursor']
|
||||
|
||||
# get the dicussion comments and replies made by our member
|
||||
user_engagement_count = {}
|
||||
for dicussion_number in discussion_numbers:
|
||||
cursor = None
|
||||
num_per_request = 10
|
||||
|
||||
while True:
|
||||
query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor)
|
||||
data = _call_graphql_api(query)
|
||||
|
||||
# get the comments
|
||||
edges = data['data']['repository']['discussion']['comments']['edges']
|
||||
|
||||
# update the cursor
|
||||
if len(edges) == 0:
|
||||
break
|
||||
else:
|
||||
# update cursor for pagination
|
||||
cursor = edges[-1]['cursor']
|
||||
|
||||
for edge in edges:
|
||||
comment = edge['node']
|
||||
if comment['authorAssociation'] == 'MEMBER':
|
||||
# check if the updatedAt is within the last 7 days
|
||||
# if yes, add it to user_engagement_count
|
||||
comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
if comment_updated_at > since:
|
||||
member_name = comment['author']['login']
|
||||
if member_name in user_engagement_count:
|
||||
user_engagement_count[member_name] += 1
|
||||
else:
|
||||
user_engagement_count[member_name] = 1
|
||||
|
||||
# get the replies
|
||||
reply_edges = comment['replies']['edges']
|
||||
if len(reply_edges) == 0:
|
||||
continue
|
||||
else:
|
||||
for reply_edge in reply_edges:
|
||||
reply = reply_edge['node']
|
||||
if reply['authorAssociation'] == 'MEMBER':
|
||||
# check if the updatedAt is within the last 7 days
|
||||
# if yes, add it to dicussion_numbers
|
||||
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
if reply_updated_at > since:
|
||||
member_name = reply['author']['login']
|
||||
if member_name in user_engagement_count:
|
||||
user_engagement_count[member_name] += 1
|
||||
else:
|
||||
user_engagement_count[member_name] = 1
|
||||
return user_engagement_count
|
||||
|
||||
|
||||
def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
|
||||
"""
|
||||
Generate the user engagement leaderboard image for stats within the last 7 days
|
||||
|
||||
Args:
|
||||
github_token (str): GitHub access token for API calls
|
||||
output_path (str): the path to save the image
|
||||
"""
|
||||
|
||||
# request to the Github API to get the users who have replied the most in the last 7 days
|
||||
now = datetime.utcnow()
|
||||
start_datetime = now - timedelta(days=7)
|
||||
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# get the issue/PR comments and discussion comment count
|
||||
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
|
||||
discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
|
||||
total_engagement_count = {}
|
||||
|
||||
# update the total engagement count
|
||||
total_engagement_count.update(issue_pr_engagement_count)
|
||||
for name, count in discussion_engagement_count.items():
|
||||
if name in total_engagement_count:
|
||||
total_engagement_count[name] += count
|
||||
else:
|
||||
total_engagement_count[name] = count
|
||||
|
||||
# prepare the data for plotting
|
||||
x = []
|
||||
y = []
|
||||
|
||||
for name, count in user_engagement_count.items():
|
||||
x.append(count)
|
||||
y.append(name)
|
||||
xticks = [str(v) for v in range(1, max(x) + 1)]
|
||||
seaborn.color_palette()
|
||||
fig = seaborn.barplot(x=x, y=y)
|
||||
fig.set(xlabel=f"Number of Comments made (since {start_datetime})",
|
||||
ylabel="Member",
|
||||
title='Active User Engagement Leaderboard')
|
||||
seaborn.despine()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=1200)
|
||||
if len(total_engagement_count) > 0:
|
||||
for name, count in total_engagement_count.items():
|
||||
x.append(count)
|
||||
y.append(name)
|
||||
|
||||
# use Shanghai time to display on the image
|
||||
start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# plot the leaderboard
|
||||
xlabel = f"Number of Comments made (since {start_datetime_str})"
|
||||
ylabel = "Member"
|
||||
title = 'Active User Engagement Leaderboard'
|
||||
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def generate_contributor_leaderboard_image(github_token, output_path):
|
||||
def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
|
||||
"""
|
||||
Generate the contributor leaderboard image for stats within the last 7 days
|
||||
|
||||
Args:
|
||||
github_token (str): GitHub access token for API calls
|
||||
output_path (str): the path to save the image
|
||||
"""
|
||||
# request to the Github API to get the users who have contributed in the last 7 days
|
||||
URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
|
||||
headers = {
|
||||
'Authorization': f'Bearer {github_token}',
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'X-GitHub-Api-Version': '2022-11-28'
|
||||
}
|
||||
response = requests.get(URL, headers=headers).json()
|
||||
|
||||
while True:
|
||||
response = requests.get(URL, headers=headers).json()
|
||||
|
||||
if len(response) != 0:
|
||||
# sometimes the Github API returns empty response for unknown reason
|
||||
# request again if the response is empty
|
||||
break
|
||||
|
||||
contributor_list = []
|
||||
|
||||
# convert unix timestamp to Beijing datetime
|
||||
start_timestamp = response[0]['weeks'][-1]['w']
|
||||
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
|
||||
|
||||
# get number of commits for each contributor
|
||||
start_timestamp = None
|
||||
for item in response:
|
||||
num_commits_this_week = item['weeks'][-1]['c']
|
||||
name = item['author']['login']
|
||||
contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
|
||||
contributor_list.append(contributor)
|
||||
|
||||
# update start_timestamp
|
||||
start_timestamp = item['weeks'][-1]['w']
|
||||
|
||||
# convert unix timestamp to Beijing datetime
|
||||
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
|
||||
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# sort by number of commits
|
||||
contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
|
||||
|
||||
# remove contributors who has zero commits
|
||||
contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
|
||||
|
||||
# plot
|
||||
seaborn.color_palette()
|
||||
# prepare the data for plotting
|
||||
x = [x.num_commits_this_week for x in contributor_list]
|
||||
y = [x.name for x in contributor_list]
|
||||
fig = seaborn.barplot(x=x, y=y)
|
||||
fig.set(xlabel=f"Number of Commits (since {start_datetime})",
|
||||
ylabel="Contributor",
|
||||
title='Active Contributor Leaderboard')
|
||||
seaborn.despine()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=1200)
|
||||
|
||||
# plot
|
||||
if len(x) > 0:
|
||||
xlabel = f"Number of Commits (since {start_datetime_str})"
|
||||
ylabel = "Contributor"
|
||||
title = 'Active Contributor Leaderboard'
|
||||
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def upload_image_to_lark(lark_tenant_token, image_path):
|
||||
def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
|
||||
"""
|
||||
Upload image to Lark and return the image key
|
||||
|
||||
Args:
|
||||
lark_tenant_token (str): Lark tenant access token
|
||||
image_path (str): the path to the image to be uploaded
|
||||
"""
|
||||
url = "https://open.feishu.cn/open-apis/im/v1/images"
|
||||
form = {'image_type': 'message', 'image': (open(image_path, 'rb'))} # 需要替换具体的path
|
||||
multi_form = MultipartEncoder(form)
|
||||
|
@ -133,19 +390,40 @@ def upload_image_to_lark(lark_tenant_token, image_path):
|
|||
return response['data']['image_key']
|
||||
|
||||
|
||||
def generate_lark_tenant_access_token(app_id, app_secret):
|
||||
def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
|
||||
"""
|
||||
Generate Lark tenant access token.
|
||||
|
||||
Args:
|
||||
app_id (str): Lark app id
|
||||
app_secret (str): Lark app secret
|
||||
"""
|
||||
url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
|
||||
data = {'app_id': app_id, 'app_secret': app_secret}
|
||||
response = requests.post(url, json=data).json()
|
||||
return response['tenant_access_token']
|
||||
|
||||
|
||||
def send_image_to_lark(image_key, webhook_url):
|
||||
def send_image_to_lark(image_key: str, webhook_url: str) -> None:
|
||||
"""
|
||||
Send image to Lark.
|
||||
|
||||
Args:
|
||||
image_key (str): the image key returned by Lark
|
||||
webhook_url (str): the webhook url to send the image
|
||||
"""
|
||||
data = {"msg_type": "image", "content": {"image_key": image_key}}
|
||||
requests.post(webhook_url, json=data)
|
||||
|
||||
|
||||
def send_message_to_lark(message, webhook_url):
|
||||
def send_message_to_lark(message: str, webhook_url: str):
|
||||
"""
|
||||
Send message to Lark.
|
||||
|
||||
Args:
|
||||
message (str): the message to be sent
|
||||
webhook_url (str): the webhook url to send the message
|
||||
"""
|
||||
data = {"msg_type": "text", "content": {"text": message}}
|
||||
requests.post(webhook_url, json=data)
|
||||
|
||||
|
@ -156,8 +434,8 @@ if __name__ == '__main__':
|
|||
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
|
||||
|
||||
# generate images
|
||||
# generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
|
||||
generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
|
||||
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
|
||||
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
|
||||
|
||||
# upload images
|
||||
APP_ID = os.environ['LARK_APP_ID']
|
||||
|
@ -166,11 +444,27 @@ if __name__ == '__main__':
|
|||
contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
|
||||
user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
|
||||
|
||||
# send contributor image to lark
|
||||
# send message to lark
|
||||
LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
|
||||
send_message_to_lark("本周的开发者贡献榜单出炉啦!", LARK_WEBHOOK_URL)
|
||||
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
|
||||
message = """本周的社区榜单出炉啦!
|
||||
1. 开发贡献者榜单
|
||||
2. 用户互动榜单
|
||||
|
||||
注:
|
||||
- 开发贡献者测评标准为:本周由公司成员提交的commit次数
|
||||
- 用户互动榜单测评标准为:本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
|
||||
"""
|
||||
|
||||
send_message_to_lark(message, LARK_WEBHOOK_URL)
|
||||
|
||||
# send contributor image to lark
|
||||
if contrib_success:
|
||||
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
|
||||
else:
|
||||
send_message_to_lark("本周没有成员贡献commit,无榜单图片生成。", LARK_WEBHOOK_URL)
|
||||
|
||||
# send user engagement image to lark
|
||||
send_message_to_lark("本周的开源社区互动榜单出炉啦!", LARK_WEBHOOK_URL)
|
||||
send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
|
||||
if engagement_success:
|
||||
send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
|
||||
else:
|
||||
send_message_to_lark("本周没有成员互动,无榜单图片生成。", LARK_WEBHOOK_URL)
|
||||
|
|
Loading…
Reference in New Issue