[workflow] added discussion stats to community report (#2572)

* [workflow] added discussion stats to community report

* polish code
pull/2585/head
Frank Lee 2023-02-06 13:47:59 +08:00 committed by GitHub
parent b0c29d1b4c
commit 8af5a0799b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 340 additions and 46 deletions

View File

@ -25,5 +25,5 @@ jobs:
env:
LARK_APP_ID: ${{ secrets.LARK_LEADERBOARD_APP_ID }}
LARK_APP_SECRET: ${{ secrets.LARK_LEADERBOARD_APP_SECRET }}
LARK_WEBHOOK_URL: ${{ secrets.LARK_LEADERBOARD_WEBHOOK_URL }}
LARK_WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
GITHUB_TOKEN: ${{ github.token }}

View File

@ -1,6 +1,7 @@
import os
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List
import matplotlib.pyplot as plt
import pytz
@ -11,16 +12,38 @@ from requests_toolbelt import MultipartEncoder
@dataclass
class Contributor:
"""
Dataclass for a github contributor.
Args:
name (str): name of the contributor
num_commits_this_week (int): number of commits made within one week
"""
name: str
num_commits_this_week: int
def generate_user_engagement_leaderboard_image(github_token, output_path):
# request to the Github API to get the users who have replied the most in the last 7 days
now = datetime.utcnow()
start_datetime = now - timedelta(days=7)
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
"""
This function is a utility to plot the bar charts.
"""
plt.clf()
seaborn.color_palette()
fig = seaborn.barplot(x=x, y=y)
fig.set(xlabel=xlabel, ylabel=ylabel, title=title)
seaborn.despine()
plt.tight_layout()
plt.savefig(output_path, dpi=1200)
def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
"""
Retrive the issue/PR comments made by our members in the last 7 days.
Args:
github_token (str): GitHub access token for API calls
since (str): the path parameter required by GitHub Restful APIs, in the format of YYYY-MM-DDTHH:MM:SSZ
"""
# prepare header
headers = {
'Authorization': f'Bearer {github_token}',
@ -33,7 +56,7 @@ def generate_user_engagement_leaderboard_image(github_token, output_path):
# do pagination to the API
page = 1
while True:
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={start_datetime_str}&page={page}'
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
comment_response = requests.get(comment_api, headers=headers).json()
if len(comment_response) == 0:
@ -61,67 +84,301 @@ def generate_user_engagement_leaderboard_image(github_token, output_path):
else:
user_engagement_count[member_name] = 1
page += 1
return user_engagement_count
# plot the leaderboard
def get_discussion_comments(github_token, since) -> Dict[str, int]:
"""
Retrive the discussion comments made by our members in the last 7 days.
This is only available via the GitHub GraphQL API.
Args:
github_token (str): GitHub access token for API calls
since (Datetime): the query parameter to determine whether the comment is made this week
"""
# use graphql to get the discussions updated in the last 7 days
def _generate_discussion_query(num, cursor: str = None):
if cursor is None:
offset_str = ""
else:
offset_str = f", after: \"{cursor}\""
query = f"""
{{
repository(owner: "hpcaitech", name: "ColossalAI"){{
discussions(first: {num} {offset_str}){{
edges {{
cursor
node{{
title
author{{
login
}}
number
authorAssociation
updatedAt
}}
}}
}}
}}
}}
"""
return query
def _generate_comment_reply_count_for_discussion(discussion_number, num, cursor: str = None):
# here we assume that each comment will not have more than 100 replies for simplicity
# otherwise, we have to go through pagination for both comment and reply
if cursor is None:
offset_str = ""
else:
offset_str = f", before: \"{cursor}\""
query = f"""
{{
repository(owner: "hpcaitech", name: "ColossalAI"){{
discussion(number: {discussion_number}){{
title
comments(last: {num} {offset_str}){{
edges{{
cursor
node {{
author{{
login
}}
updatedAt
authorAssociation
replies (last: 100) {{
edges {{
node {{
author {{
login
}}
updatedAt
authorAssociation
}}
}}
}}
}}
}}
}}
}}
}}
}}
"""
return query
# a utility function to make call to Github GraphQL API
def _call_graphql_api(query):
headers = {"Authorization": f"Bearer {github_token}"}
json_data = {'query': query}
response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
data = response.json()
return data
# get the discussion numbers updated in the last 7 days
discussion_numbers = []
num_per_request = 10
cursor = None
while True:
query = _generate_discussion_query(num_per_request, cursor)
data = _call_graphql_api(query)
found_discussion_out_of_time_range = False
edges = data['data']['repository']['discussions']['edges']
if len(edges) == 0:
break
else:
# keep the discussion whose author is not a member
for edge in edges:
# print the discussion title
discussion = edge['node']
discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
# check if the updatedAt is within the last 7 days
# if yes, add it to dicussion_numbers
if discussion_updated_at > since:
if discussion['authorAssociation'] != 'MEMBER':
discussion_numbers.append(discussion['number'])
else:
found_discussion_out_of_time_range = True
if found_discussion_out_of_time_range:
break
else:
# update cursor
cursor = edges[-1]['cursor']
# get the dicussion comments and replies made by our member
user_engagement_count = {}
for dicussion_number in discussion_numbers:
cursor = None
num_per_request = 10
while True:
query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor)
data = _call_graphql_api(query)
# get the comments
edges = data['data']['repository']['discussion']['comments']['edges']
# update the cursor
if len(edges) == 0:
break
else:
# update cursor for pagination
cursor = edges[-1]['cursor']
for edge in edges:
comment = edge['node']
if comment['authorAssociation'] == 'MEMBER':
# check if the updatedAt is within the last 7 days
# if yes, add it to user_engagement_count
comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
if comment_updated_at > since:
member_name = comment['author']['login']
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
# get the replies
reply_edges = comment['replies']['edges']
if len(reply_edges) == 0:
continue
else:
for reply_edge in reply_edges:
reply = reply_edge['node']
if reply['authorAssociation'] == 'MEMBER':
# check if the updatedAt is within the last 7 days
# if yes, add it to dicussion_numbers
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
if reply_updated_at > since:
member_name = reply['author']['login']
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
return user_engagement_count
def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
"""
Generate the user engagement leaderboard image for stats within the last 7 days
Args:
github_token (str): GitHub access token for API calls
output_path (str): the path to save the image
"""
# request to the Github API to get the users who have replied the most in the last 7 days
now = datetime.utcnow()
start_datetime = now - timedelta(days=7)
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
# get the issue/PR comments and discussion comment count
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
total_engagement_count = {}
# update the total engagement count
total_engagement_count.update(issue_pr_engagement_count)
for name, count in discussion_engagement_count.items():
if name in total_engagement_count:
total_engagement_count[name] += count
else:
total_engagement_count[name] = count
# prepare the data for plotting
x = []
y = []
for name, count in user_engagement_count.items():
x.append(count)
y.append(name)
xticks = [str(v) for v in range(1, max(x) + 1)]
seaborn.color_palette()
fig = seaborn.barplot(x=x, y=y)
fig.set(xlabel=f"Number of Comments made (since {start_datetime})",
ylabel="Member",
title='Active User Engagement Leaderboard')
seaborn.despine()
plt.tight_layout()
plt.savefig(output_path, dpi=1200)
if len(total_engagement_count) > 0:
for name, count in total_engagement_count.items():
x.append(count)
y.append(name)
# use Shanghai time to display on the image
start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
# plot the leaderboard
xlabel = f"Number of Comments made (since {start_datetime_str})"
ylabel = "Member"
title = 'Active User Engagement Leaderboard'
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
return False
def generate_contributor_leaderboard_image(github_token, output_path):
def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
"""
Generate the contributor leaderboard image for stats within the last 7 days
Args:
github_token (str): GitHub access token for API calls
output_path (str): the path to save the image
"""
# request to the Github API to get the users who have contributed in the last 7 days
URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
}
response = requests.get(URL, headers=headers).json()
while True:
response = requests.get(URL, headers=headers).json()
if len(response) != 0:
# sometimes the Github API returns empty response for unknown reason
# request again if the response is empty
break
contributor_list = []
# convert unix timestamp to Beijing datetime
start_timestamp = response[0]['weeks'][-1]['w']
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
# get number of commits for each contributor
start_timestamp = None
for item in response:
num_commits_this_week = item['weeks'][-1]['c']
name = item['author']['login']
contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
contributor_list.append(contributor)
# update start_timestamp
start_timestamp = item['weeks'][-1]['w']
# convert unix timestamp to Beijing datetime
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
# sort by number of commits
contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
# remove contributors who has zero commits
contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
# plot
seaborn.color_palette()
# prepare the data for plotting
x = [x.num_commits_this_week for x in contributor_list]
y = [x.name for x in contributor_list]
fig = seaborn.barplot(x=x, y=y)
fig.set(xlabel=f"Number of Commits (since {start_datetime})",
ylabel="Contributor",
title='Active Contributor Leaderboard')
seaborn.despine()
plt.tight_layout()
plt.savefig(output_path, dpi=1200)
# plot
if len(x) > 0:
xlabel = f"Number of Commits (since {start_datetime_str})"
ylabel = "Contributor"
title = 'Active Contributor Leaderboard'
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
return False
def upload_image_to_lark(lark_tenant_token, image_path):
def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
"""
Upload image to Lark and return the image key
Args:
lark_tenant_token (str): Lark tenant access token
image_path (str): the path to the image to be uploaded
"""
url = "https://open.feishu.cn/open-apis/im/v1/images"
form = {'image_type': 'message', 'image': (open(image_path, 'rb'))} # 需要替换具体的path
multi_form = MultipartEncoder(form)
@ -133,19 +390,40 @@ def upload_image_to_lark(lark_tenant_token, image_path):
return response['data']['image_key']
def generate_lark_tenant_access_token(app_id, app_secret):
def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
"""
Generate Lark tenant access token.
Args:
app_id (str): Lark app id
app_secret (str): Lark app secret
"""
url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
data = {'app_id': app_id, 'app_secret': app_secret}
response = requests.post(url, json=data).json()
return response['tenant_access_token']
def send_image_to_lark(image_key, webhook_url):
def send_image_to_lark(image_key: str, webhook_url: str) -> None:
"""
Send image to Lark.
Args:
image_key (str): the image key returned by Lark
webhook_url (str): the webhook url to send the image
"""
data = {"msg_type": "image", "content": {"image_key": image_key}}
requests.post(webhook_url, json=data)
def send_message_to_lark(message, webhook_url):
def send_message_to_lark(message: str, webhook_url: str):
"""
Send message to Lark.
Args:
message (str): the message to be sent
webhook_url (str): the webhook url to send the message
"""
data = {"msg_type": "text", "content": {"text": message}}
requests.post(webhook_url, json=data)
@ -156,8 +434,8 @@ if __name__ == '__main__':
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
# generate images
# generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
# upload images
APP_ID = os.environ['LARK_APP_ID']
@ -166,11 +444,27 @@ if __name__ == '__main__':
contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
# send contributor image to lark
# send message to lark
LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
send_message_to_lark("本周的开发者贡献榜单出炉啦!", LARK_WEBHOOK_URL)
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
message = """本周的社区榜单出炉啦!
1. 开发贡献者榜单
2. 用户互动榜单
- 开发贡献者测评标准为本周由公司成员提交的commit次数
- 用户互动榜单测评标准为本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
"""
send_message_to_lark(message, LARK_WEBHOOK_URL)
# send contributor image to lark
if contrib_success:
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
else:
send_message_to_lark("本周没有成员贡献commit无榜单图片生成。", LARK_WEBHOOK_URL)
# send user engagement image to lark
send_message_to_lark("本周的开源社区互动榜单出炉啦!", LARK_WEBHOOK_URL)
send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
if engagement_success:
send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
else:
send_message_to_lark("本周没有成员互动,无榜单图片生成。", LARK_WEBHOOK_URL)