diff --git a/.github/workflows/report_leaderboard_to_lark.yml b/.github/workflows/report_leaderboard_to_lark.yml index 60c3ad2a6..00d8e9e1f 100644 --- a/.github/workflows/report_leaderboard_to_lark.yml +++ b/.github/workflows/report_leaderboard_to_lark.yml @@ -25,5 +25,5 @@ jobs: env: LARK_APP_ID: ${{ secrets.LARK_LEADERBOARD_APP_ID }} LARK_APP_SECRET: ${{ secrets.LARK_LEADERBOARD_APP_SECRET }} - LARK_WEBHOOK_URL: ${{ secrets.LARK_LEADERBOARD_WEBHOOK_URL }} + LARK_WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }} GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py b/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py index 3dee16103..36cdd9518 100644 --- a/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py +++ b/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py @@ -1,6 +1,7 @@ import os from dataclasses import dataclass from datetime import datetime, timedelta +from typing import Any, Dict, List import matplotlib.pyplot as plt import pytz @@ -11,16 +12,38 @@ from requests_toolbelt import MultipartEncoder @dataclass class Contributor: + """ + Dataclass for a github contributor. + + Args: + name (str): name of the contributor + num_commits_this_week (int): number of commits made within one week + """ name: str num_commits_this_week: int -def generate_user_engagement_leaderboard_image(github_token, output_path): - # request to the Github API to get the users who have replied the most in the last 7 days - now = datetime.utcnow() - start_datetime = now - timedelta(days=7) - start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ") +def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None: + """ + This function is a utility to plot the bar charts. + """ + plt.clf() + seaborn.color_palette() + fig = seaborn.barplot(x=x, y=y) + fig.set(xlabel=xlabel, ylabel=ylabel, title=title) + seaborn.despine() + plt.tight_layout() + plt.savefig(output_path, dpi=1200) + +def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]: + """ + Retrive the issue/PR comments made by our members in the last 7 days. + + Args: + github_token (str): GitHub access token for API calls + since (str): the path parameter required by GitHub Restful APIs, in the format of YYYY-MM-DDTHH:MM:SSZ + """ # prepare header headers = { 'Authorization': f'Bearer {github_token}', @@ -33,7 +56,7 @@ def generate_user_engagement_leaderboard_image(github_token, output_path): # do pagination to the API page = 1 while True: - comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={start_datetime_str}&page={page}' + comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}' comment_response = requests.get(comment_api, headers=headers).json() if len(comment_response) == 0: @@ -61,67 +84,301 @@ def generate_user_engagement_leaderboard_image(github_token, output_path): else: user_engagement_count[member_name] = 1 page += 1 + return user_engagement_count - # plot the leaderboard + +def get_discussion_comments(github_token, since) -> Dict[str, int]: + """ + Retrive the discussion comments made by our members in the last 7 days. + This is only available via the GitHub GraphQL API. + + Args: + github_token (str): GitHub access token for API calls + since (Datetime): the query parameter to determine whether the comment is made this week + """ + + # use graphql to get the discussions updated in the last 7 days + def _generate_discussion_query(num, cursor: str = None): + if cursor is None: + offset_str = "" + else: + offset_str = f", after: \"{cursor}\"" + query = f""" + {{ + repository(owner: "hpcaitech", name: "ColossalAI"){{ + discussions(first: {num} {offset_str}){{ + edges {{ + cursor + node{{ + title + author{{ + login + }} + number + authorAssociation + updatedAt + }} + }} + }} + }} + }} + """ + return query + + def _generate_comment_reply_count_for_discussion(discussion_number, num, cursor: str = None): + # here we assume that each comment will not have more than 100 replies for simplicity + # otherwise, we have to go through pagination for both comment and reply + if cursor is None: + offset_str = "" + else: + offset_str = f", before: \"{cursor}\"" + query = f""" + {{ + repository(owner: "hpcaitech", name: "ColossalAI"){{ + discussion(number: {discussion_number}){{ + title + comments(last: {num} {offset_str}){{ + edges{{ + cursor + node {{ + author{{ + login + }} + updatedAt + authorAssociation + replies (last: 100) {{ + edges {{ + node {{ + author {{ + login + }} + updatedAt + authorAssociation + }} + }} + }} + }} + }} + }} + }} + }} + }} + """ + return query + + # a utility function to make call to Github GraphQL API + def _call_graphql_api(query): + headers = {"Authorization": f"Bearer {github_token}"} + json_data = {'query': query} + response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers) + data = response.json() + return data + + # get the discussion numbers updated in the last 7 days + discussion_numbers = [] + num_per_request = 10 + cursor = None + while True: + query = _generate_discussion_query(num_per_request, cursor) + data = _call_graphql_api(query) + found_discussion_out_of_time_range = False + + edges = data['data']['repository']['discussions']['edges'] + if len(edges) == 0: + break + else: + # keep the discussion whose author is not a member + for edge in edges: + # print the discussion title + discussion = edge['node'] + + discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ") + # check if the updatedAt is within the last 7 days + # if yes, add it to dicussion_numbers + if discussion_updated_at > since: + if discussion['authorAssociation'] != 'MEMBER': + discussion_numbers.append(discussion['number']) + else: + found_discussion_out_of_time_range = True + + if found_discussion_out_of_time_range: + break + else: + # update cursor + cursor = edges[-1]['cursor'] + + # get the dicussion comments and replies made by our member + user_engagement_count = {} + for dicussion_number in discussion_numbers: + cursor = None + num_per_request = 10 + + while True: + query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor) + data = _call_graphql_api(query) + + # get the comments + edges = data['data']['repository']['discussion']['comments']['edges'] + + # update the cursor + if len(edges) == 0: + break + else: + # update cursor for pagination + cursor = edges[-1]['cursor'] + + for edge in edges: + comment = edge['node'] + if comment['authorAssociation'] == 'MEMBER': + # check if the updatedAt is within the last 7 days + # if yes, add it to user_engagement_count + comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ") + if comment_updated_at > since: + member_name = comment['author']['login'] + if member_name in user_engagement_count: + user_engagement_count[member_name] += 1 + else: + user_engagement_count[member_name] = 1 + + # get the replies + reply_edges = comment['replies']['edges'] + if len(reply_edges) == 0: + continue + else: + for reply_edge in reply_edges: + reply = reply_edge['node'] + if reply['authorAssociation'] == 'MEMBER': + # check if the updatedAt is within the last 7 days + # if yes, add it to dicussion_numbers + reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ") + if reply_updated_at > since: + member_name = reply['author']['login'] + if member_name in user_engagement_count: + user_engagement_count[member_name] += 1 + else: + user_engagement_count[member_name] = 1 + return user_engagement_count + + +def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool: + """ + Generate the user engagement leaderboard image for stats within the last 7 days + + Args: + github_token (str): GitHub access token for API calls + output_path (str): the path to save the image + """ + + # request to the Github API to get the users who have replied the most in the last 7 days + now = datetime.utcnow() + start_datetime = now - timedelta(days=7) + start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ") + + # get the issue/PR comments and discussion comment count + issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str) + discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime) + total_engagement_count = {} + + # update the total engagement count + total_engagement_count.update(issue_pr_engagement_count) + for name, count in discussion_engagement_count.items(): + if name in total_engagement_count: + total_engagement_count[name] += count + else: + total_engagement_count[name] = count + + # prepare the data for plotting x = [] y = [] - for name, count in user_engagement_count.items(): - x.append(count) - y.append(name) - xticks = [str(v) for v in range(1, max(x) + 1)] - seaborn.color_palette() - fig = seaborn.barplot(x=x, y=y) - fig.set(xlabel=f"Number of Comments made (since {start_datetime})", - ylabel="Member", - title='Active User Engagement Leaderboard') - seaborn.despine() - plt.tight_layout() - plt.savefig(output_path, dpi=1200) + if len(total_engagement_count) > 0: + for name, count in total_engagement_count.items(): + x.append(count) + y.append(name) + + # use Shanghai time to display on the image + start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ") + + # plot the leaderboard + xlabel = f"Number of Comments made (since {start_datetime_str})" + ylabel = "Member" + title = 'Active User Engagement Leaderboard' + plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path) + return True + else: + return False -def generate_contributor_leaderboard_image(github_token, output_path): +def generate_contributor_leaderboard_image(github_token, output_path) -> bool: + """ + Generate the contributor leaderboard image for stats within the last 7 days + + Args: + github_token (str): GitHub access token for API calls + output_path (str): the path to save the image + """ + # request to the Github API to get the users who have contributed in the last 7 days URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors' headers = { 'Authorization': f'Bearer {github_token}', 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } - response = requests.get(URL, headers=headers).json() + + while True: + response = requests.get(URL, headers=headers).json() + + if len(response) != 0: + # sometimes the Github API returns empty response for unknown reason + # request again if the response is empty + break contributor_list = [] - # convert unix timestamp to Beijing datetime - start_timestamp = response[0]['weeks'][-1]['w'] - start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai')) - # get number of commits for each contributor + start_timestamp = None for item in response: num_commits_this_week = item['weeks'][-1]['c'] name = item['author']['login'] contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week) contributor_list.append(contributor) + # update start_timestamp + start_timestamp = item['weeks'][-1]['w'] + + # convert unix timestamp to Beijing datetime + start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai')) + start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ") + # sort by number of commits contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True) # remove contributors who has zero commits contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0] - # plot - seaborn.color_palette() + # prepare the data for plotting x = [x.num_commits_this_week for x in contributor_list] y = [x.name for x in contributor_list] - fig = seaborn.barplot(x=x, y=y) - fig.set(xlabel=f"Number of Commits (since {start_datetime})", - ylabel="Contributor", - title='Active Contributor Leaderboard') - seaborn.despine() - plt.tight_layout() - plt.savefig(output_path, dpi=1200) + + # plot + if len(x) > 0: + xlabel = f"Number of Commits (since {start_datetime_str})" + ylabel = "Contributor" + title = 'Active Contributor Leaderboard' + plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path) + return True + else: + return False -def upload_image_to_lark(lark_tenant_token, image_path): +def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str: + """ + Upload image to Lark and return the image key + + Args: + lark_tenant_token (str): Lark tenant access token + image_path (str): the path to the image to be uploaded + """ url = "https://open.feishu.cn/open-apis/im/v1/images" form = {'image_type': 'message', 'image': (open(image_path, 'rb'))} # 需要替换具体的path multi_form = MultipartEncoder(form) @@ -133,19 +390,40 @@ def upload_image_to_lark(lark_tenant_token, image_path): return response['data']['image_key'] -def generate_lark_tenant_access_token(app_id, app_secret): +def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str: + """ + Generate Lark tenant access token. + + Args: + app_id (str): Lark app id + app_secret (str): Lark app secret + """ url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal' data = {'app_id': app_id, 'app_secret': app_secret} response = requests.post(url, json=data).json() return response['tenant_access_token'] -def send_image_to_lark(image_key, webhook_url): +def send_image_to_lark(image_key: str, webhook_url: str) -> None: + """ + Send image to Lark. + + Args: + image_key (str): the image key returned by Lark + webhook_url (str): the webhook url to send the image + """ data = {"msg_type": "image", "content": {"image_key": image_key}} requests.post(webhook_url, json=data) -def send_message_to_lark(message, webhook_url): +def send_message_to_lark(message: str, webhook_url: str): + """ + Send message to Lark. + + Args: + message (str): the message to be sent + webhook_url (str): the webhook url to send the message + """ data = {"msg_type": "text", "content": {"text": message}} requests.post(webhook_url, json=data) @@ -156,8 +434,8 @@ if __name__ == '__main__': USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png' # generate images - # generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH) - generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH) + contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH) + engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH) # upload images APP_ID = os.environ['LARK_APP_ID'] @@ -166,11 +444,27 @@ if __name__ == '__main__': contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH) user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH) - # send contributor image to lark + # send message to lark LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL'] - send_message_to_lark("本周的开发者贡献榜单出炉啦!", LARK_WEBHOOK_URL) - send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL) + message = """本周的社区榜单出炉啦! +1. 开发贡献者榜单 +2. 用户互动榜单 + +注: +- 开发贡献者测评标准为:本周由公司成员提交的commit次数 +- 用户互动榜单测评标准为:本周由公司成员在非成员创建的issue/PR/discussion中回复的次数 +""" + + send_message_to_lark(message, LARK_WEBHOOK_URL) + + # send contributor image to lark + if contrib_success: + send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL) + else: + send_message_to_lark("本周没有成员贡献commit,无榜单图片生成。", LARK_WEBHOOK_URL) # send user engagement image to lark - send_message_to_lark("本周的开源社区互动榜单出炉啦!", LARK_WEBHOOK_URL) - send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL) + if engagement_success: + send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL) + else: + send_message_to_lark("本周没有成员互动,无榜单图片生成。", LARK_WEBHOOK_URL)