一、问题
本地有一个文件夹openEuler-22.03-LTS(如下图所示),文件夹名是远程(Gitee平台)仓库的分支名,文件都是yaml文件,文件名对应仓库名,都是https://gitee.com/src-oepkgs/下的仓库,需要将每个文件依次添加到各自对应的仓库中,并提交PR到远程仓库的openEuler-22.03-LTS分支上。
二、思路1:走fork进行合并
先fork后clone,PR在相同的分支不同的用户(fork和被fork的用户)之间
Step1
由文件名解析出仓库名,与https://gitee.com/src-oepkgs/进行拼接,得到仓库的完整地址,比如:https://gitee.com/src-oepkgs/a2jmidid.git,为例
Step2
这个仓库fork到自己的账号(自己也当成组织),此时这个仓库的归属权是我,这个仓库的地址变为https://gitee.com/oepkgs-support/a2jmidid
Step3
将fork后(归属权是我)的仓库https://gitee.com/oepkgs-support/a2jmidid.git克隆指定分支(openEuler-22.03-LTS分支)到本地(电脑)D:/workspace/自动化/openEuler-22.03-LTS文件夹下,D:/workspace/自动化/openEuler-22.03-LTS/a2jmidid
git clone -b openEuler-22.03-LTS https://gitee.com/oepkgs-support/a2jmidid.git
Step4
进入到这个仓库文件夹,会自动会切换到本地的openEuler-22.03-LTS分支
Step5
取回远程主机openEuler-22.03-LTS分支的更新,再与本地的openEuler-22.03-LTS分支合并
git pull origin openEuler-22.03-LTS: openEuler-22.03-LTS
Step6
将openEuler-22.03-LTS文件夹下的a2jmidid.yaml文件复制到这个项目仓库(文件夹:D:/workspace/自动化/openEuler-22.03-LTS / a2jmidid)中
Step7
提交并推送到远程的openEuler-22.03-LTS分支
git add -A
git commit -m ‘add files’
git push origin openEuler-22.03-LTS:openEuler-22.03-LTS
这里直接push到远程的openEuler-22.03-LTS分支是可以的,因为这个push是在本地oepkgs-support/a2jmidid和远程的oepkgs-support/a2jmidid这同一个用户之间。oepkgs-support(人)是这个仓库的所有者,有直接推送这个权限。
Step8
创建oepkgs-support:openEuler-22.03-LTS这个分支到src-oepkgs:openEuler-22.03-LTS分支的PR:https://gitee.com/src-oepkgs/a2jmidid/pulls/1
完整代码:
import os
import subprocess
import time
import requests
import shutil
import pandas as pd
def fork_repo(repo_name):
# # 仓库信息
owner = "src-oepkgs" # 替换为要Fork的用户名
# 授权信息
access_token = 'xxxxxxxxxxx' # 替换为自己账号的访问令牌
# API URL
url = f"https://gitee.com/api/v5/repos/{owner}/{repo_name}/forks"
# 请求头
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {access_token}"
}
# 发送POST请求进行Fork操作
response = requests.post(url, headers=headers)
# 检查响应状态码
if response.status_code == 201:
print("Fork created successfully!")
print("Forked repository URL:", response.json()["html_url"])
else:
print("Failed to create Fork.")
print("Error message:", response.json()["message"])
def delete_repository(repo_name):
owner = "oepkgs-support" # 替换仓库所属空间地址
access_token = 'xxxxxxxxxxx' # 替换为你的访问令牌
url = f"https://gitee.com/api/v5/repos/{owner}/{repo_name}"
headers = {
"Authorization": f"Bearer {access_token}"
}
response = requests.delete(url, headers=headers)
if response.status_code == 204:
print("仓库删除成功")
else:
print("仓库删除失败")
def git_add_commit_push(repo_path, file_name, branch):
"""执行 git add, commit, push"""
os.chdir(repo_path)
# subprocess.run(["git", "pull", "origin", branch, ":", branch])
subprocess.run(["git", "add", file_name])
subprocess.run(["git", "commit", "-m", f"Add {file_name}"])
subprocess.run(["git", "push", "origin", f"{branch}:{branch}"])
def create_pr(repo_name):
# 仓库信息
owner = "src-oepkgs" # 替换仓库所属空间地址
# 授权信息
access_token = 'xxxxxxxxxxxxx' # 替换为自己账号的访问令牌
# 创建Pull Request的参数
title = "add yaml file"
head = "oepkgs-support:openEuler-22.03-LTS" # 替换为您的源分支名称
base = "openEuler-22.03-LTS" # 替换为您的目标分支名称
# API URL
url = f"https://gitee.com/api/v5/repos/{owner}/{repo_name}/pulls"
# 请求头
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {access_token}"
}
# 请求体
data = {
"title": title,
"head": head,
"base": base
}
# 发送POST请求创建Pull Request
response = requests.post(url, headers=headers, json=data)
# 检查响应状态码
if response.status_code == 201:
print("Pull Request created successfully!")
print("Pull Request URL:", response.json()["html_url"])
else:
print("Failed to create Pull Request.")
print("Error message:", response.json()["message"])
def main():
# 本地文件夹路径和远程分支名
basedir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
local_folder = os.path.join(basedir, "openEuler-22.03-LTS") # 本地文件夹路径
branch_name = "openEuler-22.03-LTS"
index = 0
abnormal_repo = []
start_ = 'a'
end_ = 'z'
# 遍历文件夹中的每个文件
for file_name in os.listdir(local_folder):
# if file_name.endswith('.yaml'):
if file_name.lower().endswith('.yaml') and start_ <= file_name[0].lower() <= end_:
# 解析出文件名
repo_name = file_name.replace('.yaml', '') # 从文件名获取仓库名
print(repo_name)
index = index + 1
# fork仓库
try:
fork_repo(repo_name)
fork_repo_url = f"https://gitee.com/oepkgs-support/{repo_name}.git"
# 克隆仓库(如果需要)
repo_path = os.path.join(local_folder, repo_name)
if not os.path.exists(repo_path):
try:
subprocess.run(["git", "clone", "-b", branch_name, fork_repo_url, repo_path])
except Exception:
abnormal_repo.append(repo_name)
else:
# 判断repopath这个文件夹存不存在
if os.path.exists(repo_path):
print('clone success')
# 先pull一下,取回远程主机openEuler-22.03-LTS分支的更新
os.chdir(repo_path)
subprocess.run(["git", "config", "--global", "http.sslVerify", "true"])
subprocess.run(["git", "pull", "origin", branch_name])
# 将文件复制到仓库目录中
file_path = os.path.join(local_folder, file_name)
shutil.copy(file_path, repo_path)
# 执行 git 操作
git_add_commit_push(repo_path, file_name, branch_name)
# 创建PR
create_pr(repo_name)
print(repo_path)
try:
# os.system(f"attrib -r {repo_path}")
# shutil.rmtree(repo_path)
# remove_readonly_dir(repo_path)
# os.removedirs(repo_path)
subprocess.run(["rmdir", "/s", "/q", repo_path], shell=True)
except Exception as e:
print("删除文件夹时出现错误:", str(e))
else:
abnormal_repo.append(repo_name)
except Exception as e:
print("fork出现错误:", str(e))
abnormal_repo.append(repo_name)
time.sleep(30)
fork_repo(repo_name)
if index % 100 == 0:
print(abnormal_repo)
df = pd.DataFrame(abnormal_repo, columns=['异常仓库'])
df.to_excel(os.path.join(basedir, 'log') + '\\' + f'{start_}-{end_}异常仓库{index}.xlsx', index=False)
if __name__ == "__main__":
main()
问题1:fork不成功
fork出现错误: HTTPSConnectionPool(host='gitee.com', port=443): Max retries exceeded with url: /api/v5/repos/src-oepkgs/gtk-theme-adwaita-tweaks/forks (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))
解决方法:引入异常捕获,增加时延,记录异常的仓库
问题2:fork成功但是不一定能够克隆下来
git clone时fatal: unable to access ‘https://gitee.com/XXX.git/‘: The requested returned error: 500
解决方法:引入异常捕获,记录异常的仓库
问题3:每个账号fork的项目数量有限制,最多1k,如果创建的PR没有合并,删除这个fork的仓库,这个PR也会被删除
换一种思路,走分支合并,不走fork合并。
三、走分支合并
不fork,直接clone原仓库,在远程源仓库创建新分支,走分支合并,PR合并在同一个用户的不同分支之间。
Step1
直接克隆原仓库到本地:git clone -b openEuler-22.03-LTS https://gitee.com/src-oepkgs/python-zeroconf.git
step2
进入到这个仓库文件夹,自动会切换到本地的openEuler-22.03-LTS分支
step3
取回远程主机openEuler-22.03-LTS分支的更新,再与本地的openEuler-22.03-LTS分支合并
git pull origin openEuler-22.03-LTS: openEuler-22.03-LTS
step4
将openEuler-22.03-LTS文件夹下的a2jmidid.yaml文件复制到这个项目仓库中
step5
提交并推送到远程的openEuler-22.03-LTS-yaml分支(相当于在远程建了一个新的openEuler-22.03-LTS-yaml分支并push到这个分支)
git add -A
git commit -m ‘add files’
git push origin openEuler-22.03-LTS:openEuler-22.03-LTS-yaml
这里直接push到远程的openEuler-22.03-LTS是会报错的,因为这个push是在本地oepkgs/a2jmidid和远程的oepkgs/a2jmidid这同一个用户之间,oepkgs-support(人)不是仓库a2jmidid的管理员(所有者或开发者),没有直接推送这个权限,万一提交不当会直接污染这个远程分支,因此需要在远程新建一个分支push到这个新分支,然后创建openEuler-22.03-LTS-yaml这个分支到openEuler-22.03-LTS分支的PR进行合并。
step6
创建src-oepkgs:openEuler-22.03-LTS-yaml这个分支到src-oepkgs:openEuler-22.03-LTS分支的PR
完整代码:
import os
import subprocess
import time
import requests
import shutil
import pandas as pd
def git_add_commit_push(repo_path, file_name, branch):
"""执行 git add, commit, push"""
os.chdir(repo_path)
subprocess.run(['git', 'config', '--global', 'core.autocrlf', 'true'])
subprocess.run(["git", "add", file_name])
subprocess.run(["git", "commit", "-m", f"Add {file_name}"])
subprocess.run(["git", "push", "origin", f"{branch}:{branch}-yaml"])
def create_pr(repo_name):
# 仓库信息
owner = "src-oepkgs" # 替换仓库所属空间地址
# 授权信息
access_token = '4e8xxxx250afc5a19' # 自己账号的访问令牌
# 创建Pull Request的参数
title = "add yaml file"
head = "openEuler-22.03-LTS-yaml" # 源分支名称
base = "openEuler-22.03-LTS" # 目标分支名称
# API URL
url = f"https://gitee.com/api/v5/repos/{owner}/{repo_name}/pulls"
# 请求头
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {access_token}"
}
# 请求体
data = {
"title": title,
"head": head,
"base": base
}
# 发送POST请求创建Pull Request
response = requests.post(url, headers=headers, json=data)
# 检查响应状态码
if response.status_code == 201:
print("Pull Request created successfully!")
print("Pull Request URL:", response.json()["html_url"])
else:
print("Failed to create Pull Request.")
print("Error message:", response.json()["message"])
def main():
# 本地文件夹路径和远程分支名
basedir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
local_folder = os.path.join(basedir, "openEuler-22.03-LTS") # 本地文件夹路径
branch_name = "openEuler-22.03-LTS"
index = 0
abnormal_repo = []
start_ = 'a'
end_ = 'z'
# 遍历文件夹中的每个文件
for file_name in os.listdir(local_folder):
if file_name.lower().endswith('.yaml') and start_ <= file_name[0].lower() <= end_:
# 解析出文件名
repo_name = file_name.replace('.yaml', '') # 从文件名获取仓库名
print(repo_name)
index = index + 1
# 克隆仓库
repo_url = f"https://gitee.com/src-oepkgs/{repo_name}.git"
repo_path = os.path.join(local_folder, repo_name)
if not os.path.exists(repo_path):
try:
subprocess.run(["git", "clone", "-b", branch_name, repo_url, repo_path])
except Exception:
abnormal_repo.append(repo_name)
else:
# 判断repopath这个文件夹存不存在
if os.path.exists(repo_path):
print('clone success')
# 先pull一下,取回远程主机openEuler-22.03-LTS分支的更新
os.chdir(repo_path)
subprocess.run(["git", "config", "--global", "http.sslVerify", "true"])
subprocess.run(["git", "pull", "origin", branch_name])
# 将文件复制到仓库目录中
file_path = os.path.join(local_folder, file_name)
shutil.copy(file_path, repo_path)
# 执行 git 操作
git_add_commit_push(repo_path, file_name, branch_name)
# 创建PR
try:
create_pr(repo_name)
except Exception as e:
print("create pr出现错误:", str(e))
abnormal_repo.append(repo_name)
time.sleep(30)
create_pr(repo_name)
# 删除克隆的仓库文件夹内容
try:
subprocess.run(["rmdir", "/s", "/q", repo_path], shell=True)
except Exception as e:
print("删除文件夹时出现错误:", str(e))
else:
abnormal_repo.append(repo_name)
if index % 100 == 0:
print(abnormal_repo)
df = pd.DataFrame(abnormal_repo, columns=['异常仓库'])
df.to_excel(os.path.join(basedir, 'log') + '\\' + f'{start_}-{end_}异常仓库{index}.xlsx', index=False)
if __name__ == "__main__":
main()
本人读研期间发表6篇SCI数据挖掘相关论文,现在某研究院从事数据算法相关科研工作,对Python有一定认知和理解,会结合自身科研实践经历不定期分享关于Python、机器学习、深度学习、人工智能基础知识与应用案例。
致力于只做原创,以最简单的方式理解和学习,关注我一起交流成长。
1、关注“数据杂坛”公众号,即可在后台联系我获取相关数据集和源码。
2、关注“数据杂坛”公众号,点击“领资料”即可免费领取资料书籍。
3、需要论文指导或商务合作,点击“联系我”添加作者微信直接交流。
原文链接: