pandas 操作 excel 备忘

以下链接是我边搜边写 pandas 代码的过程,记录一下。写完手头需求之后基本也懂了。

总结性内容:

细节操作:

我的test代码:

import pandas as pd
import yaml
import datetime

class Project():
    def __init__(self,name):
        self.name = name

    @staticmethod
    def columns():
        return {
            'name':'项目名称',
            'requestCount':'需求总数',
            "requestDoneCount":'需求完成数', 
            'percentRequestCount':'需求完成率',
            'requestKpiCount':"一个月前需求总数",
            "requestKpiDoneCount":'一个月前需求完成数',
            'percentKpiRequestCount':'一个月前需求完成率',
            'director':'主责人',
        }

    def doneFlag():
        return ("a","b","c","d","e")
    def directorOverwrite():
        return {
            "xxx平台":"无",
        }

def getProjectDict(df,now):
    ft = df[df.主责中心 == "kelu.org中心"]
    ftProjects = ft.groupby("blog")
    datetimeNow=datetime.datetime.strptime(now,"%Y%m%d")
    kpiTime=datetimeNow.replace(month=datetimeNow.month - 1).strftime("%Y-%m-%d")

    projectDict = {}

    for mainSys, sysReqs in ftProjects:
        project = Project(mainSys)
        project.requestCount = sysReqs.shape[0]
        sysReqsDone = sysReqs[sysReqs['状态'].isin(Project.doneFlag())]
        project.requestDoneCount = sysReqsDone.shape[0]
        if(project.requestCount == 0):
            project.percentRequestCount = 1
        else:
            project.percentRequestCount = round((float(project.requestDoneCount) / project.requestCount), 4)

        sysReqs['需求创建时间'] = pd.to_datetime(sysReqs['需求创建时间'])
        sysReqsKpi = sysReqs[sysReqs['需求创建时间'] < kpiTime]
        project.requestKpiCount = sysReqsKpi.shape[0]
        sysReqsKpiDone = sysReqsKpi[sysReqsKpi['状态'].isin(Project.doneFlag())]
        project.requestKpiDoneCount = sysReqsKpiDone.shape[0]
        if(project.requestKpiCount == 0):
            project.percentKpiRequestCount = 1
        else:
            project.percentKpiRequestCount = round((float(project.requestKpiDoneCount) / project.requestKpiCount), 4)

        if mainSys in Project.directorOverwrite():
            project.director = Project.directorOverwrite()[mainSys]
        else:
            for _, reqData in sysReqs.iterrows():
                project.director = reqData.主责人
                break

        project.reqs = sysReqs

        projectDict[mainSys] = vars(project)
        # print(type(projectDict[mainSys]))

    return projectDict

def exportMainSheet(projectDict):
    df=pd.DataFrame(projectDict).transpose()
    newDf=df[[
        'name',
        'requestCount',
        "requestDoneCount",
        'percentRequestCount',
        'requestKpiCount',
        'requestKpiDoneCount',
        'percentKpiRequestCount',
        'director'
        ]].sort_values(by=['percentKpiRequestCount','requestKpiCount'],ascending=False)

    # 处理输出内容
    newDf.reset_index(drop=True, inplace=True)
    newDf.index = newDf.index + 1
    newDf.loc['合计'] = newDf[['requestCount',"requestDoneCount",'requestKpiCount','requestKpiDoneCount']].sum()
    newDf.loc['合计','percentRequestCount'] = round((float(newDf.loc['合计','requestDoneCount']) / newDf.loc['合计','requestCount']), 4)
    newDf.loc['合计','percentKpiRequestCount'] = round((float(newDf.loc['合计','requestKpiDoneCount']) / newDf.loc['合计','requestKpiCount']), 4)

    newDf[u'percentRequestCount']= newDf['percentRequestCount'].apply(lambda x:format(x, '.2%'))
    newDf[u'percentKpiRequestCount']= newDf['percentKpiRequestCount'].apply(lambda x:format(x, '.2%'))

    newDf.rename(columns=Project.columns(),inplace=True)

    return newDf

def main():
    now='20221115'
    inputFilename='input'+now+'.xlsx'
    outputFilename='output'+now+'.xlsx'

    df = pd.read_excel(inputFilename,sheet_name='需求列表')
    projectDict = getProjectDict(df,now)
    # print(yaml.dump(projectDict,allow_unicode=True))

    with pd.ExcelWriter(outputFilename) as writer:
        exportMainSheet(projectDict).to_excel(writer, sheet_name="汇总")
        directorDict = {}
        for projectItem in projectDict.values():
            if(projectItem['percentRequestCount'] < 1):
                sysReqs = projectItem['reqs']
                reqsNotDone = sysReqs[~sysReqs['状态'].isin(Project.doneFlag())]
                if projectItem['director'] in directorDict:
                    reqsNotDone = pd.concat([reqsNotDone,directorDict[projectItem['director']]])
                directorDict[projectItem['director']] = reqsNotDone

        for director, reqsNotDone in directorDict.items():
            reqsNotDone.reset_index(drop=True, inplace=True)
            reqsNotDone.index = reqsNotDone.index + 1
            reqsNotDone.to_excel(writer, sheet_name=director)

if __name__=="__main__":
    main()

iterm2 给台前调度Stage Manager留出空间 在 Mac M1 上安装 selenium