pandas 操作 excel 备忘
2022-11-12 tech python pandas 16 mins 5677 字
以下链接是我边搜边写 pandas 代码的过程,记录一下。写完手头需求之后基本也懂了。
总结性内容:
- 总结了这67个pandas函数,完美解决数据处理,拿来即用!
- Pandas必会的方法汇总,数据分析必备!
- Pandas官网中文手册:https://www.pypandas.cn/docs/getting_started/10min.html
- https://github.com/yeayee/joyful-pandas
- pandas 教程
细节操作:
- Pandas DataFrame数据的增、删、改、查
- pandas 增加汇总行
- pandas添加汇总行、汇总列
- pandas 查询筛选数据
- 使用pandas读取excel
- pandas中DataFrame 数据合并,连接(merge,join,concat)
-
dataframe 计算行数:How do I get the row count of a Pandas DataFrame?
-
筛选行isin:Use a list of values to select rows from a Pandas dataframe
-
重置索引
df.reset_index(drop=True)
-
按时间对比筛选行:filter/select rows of pandas dataframe by timestamp column
-
从dict创建dataframe:创建Pandas DataFrame的6种方法
-
行列转换:Pandas行列转换的4大技巧
-
写入excel:pandas ExcelWriter用法及代码示例
-
Reset_index不生效 Pandas reset index is not taking effect duplicate
- index 从1开始: In Python pandas, start row index from 1 instead of zero without creating additional column
我的test代码:
import pandas as pd
import yaml
import datetime
class Project():
def __init__(self,name):
self.name = name
@staticmethod
def columns():
return {
'name':'项目名称',
'requestCount':'需求总数',
"requestDoneCount":'需求完成数',
'percentRequestCount':'需求完成率',
'requestKpiCount':"一个月前需求总数",
"requestKpiDoneCount":'一个月前需求完成数',
'percentKpiRequestCount':'一个月前需求完成率',
'director':'主责人',
}
def doneFlag():
return ("a","b","c","d","e")
def directorOverwrite():
return {
"xxx平台":"无",
}
def getProjectDict(df,now):
ft = df[df.主责中心 == "kelu.org中心"]
ftProjects = ft.groupby("blog")
datetimeNow=datetime.datetime.strptime(now,"%Y%m%d")
kpiTime=datetimeNow.replace(month=datetimeNow.month - 1).strftime("%Y-%m-%d")
projectDict = {}
for mainSys, sysReqs in ftProjects:
project = Project(mainSys)
project.requestCount = sysReqs.shape[0]
sysReqsDone = sysReqs[sysReqs['状态'].isin(Project.doneFlag())]
project.requestDoneCount = sysReqsDone.shape[0]
if(project.requestCount == 0):
project.percentRequestCount = 1
else:
project.percentRequestCount = round((float(project.requestDoneCount) / project.requestCount), 4)
sysReqs['需求创建时间'] = pd.to_datetime(sysReqs['需求创建时间'])
sysReqsKpi = sysReqs[sysReqs['需求创建时间'] < kpiTime]
project.requestKpiCount = sysReqsKpi.shape[0]
sysReqsKpiDone = sysReqsKpi[sysReqsKpi['状态'].isin(Project.doneFlag())]
project.requestKpiDoneCount = sysReqsKpiDone.shape[0]
if(project.requestKpiCount == 0):
project.percentKpiRequestCount = 1
else:
project.percentKpiRequestCount = round((float(project.requestKpiDoneCount) / project.requestKpiCount), 4)
if mainSys in Project.directorOverwrite():
project.director = Project.directorOverwrite()[mainSys]
else:
for _, reqData in sysReqs.iterrows():
project.director = reqData.主责人
break
project.reqs = sysReqs
projectDict[mainSys] = vars(project)
# print(type(projectDict[mainSys]))
return projectDict
def exportMainSheet(projectDict):
df=pd.DataFrame(projectDict).transpose()
newDf=df[[
'name',
'requestCount',
"requestDoneCount",
'percentRequestCount',
'requestKpiCount',
'requestKpiDoneCount',
'percentKpiRequestCount',
'director'
]].sort_values(by=['percentKpiRequestCount','requestKpiCount'],ascending=False)
# 处理输出内容
newDf.reset_index(drop=True, inplace=True)
newDf.index = newDf.index + 1
newDf.loc['合计'] = newDf[['requestCount',"requestDoneCount",'requestKpiCount','requestKpiDoneCount']].sum()
newDf.loc['合计','percentRequestCount'] = round((float(newDf.loc['合计','requestDoneCount']) / newDf.loc['合计','requestCount']), 4)
newDf.loc['合计','percentKpiRequestCount'] = round((float(newDf.loc['合计','requestKpiDoneCount']) / newDf.loc['合计','requestKpiCount']), 4)
newDf[u'percentRequestCount']= newDf['percentRequestCount'].apply(lambda x:format(x, '.2%'))
newDf[u'percentKpiRequestCount']= newDf['percentKpiRequestCount'].apply(lambda x:format(x, '.2%'))
newDf.rename(columns=Project.columns(),inplace=True)
return newDf
def main():
now='20221115'
inputFilename='input'+now+'.xlsx'
outputFilename='output'+now+'.xlsx'
df = pd.read_excel(inputFilename,sheet_name='需求列表')
projectDict = getProjectDict(df,now)
# print(yaml.dump(projectDict,allow_unicode=True))
with pd.ExcelWriter(outputFilename) as writer:
exportMainSheet(projectDict).to_excel(writer, sheet_name="汇总")
directorDict = {}
for projectItem in projectDict.values():
if(projectItem['percentRequestCount'] < 1):
sysReqs = projectItem['reqs']
reqsNotDone = sysReqs[~sysReqs['状态'].isin(Project.doneFlag())]
if projectItem['director'] in directorDict:
reqsNotDone = pd.concat([reqsNotDone,directorDict[projectItem['director']]])
directorDict[projectItem['director']] = reqsNotDone
for director, reqsNotDone in directorDict.items():
reqsNotDone.reset_index(drop=True, inplace=True)
reqsNotDone.index = reqsNotDone.index + 1
reqsNotDone.to_excel(writer, sheet_name=director)
if __name__=="__main__":
main()