if int(IntegerNum) > 0:#获取整型部分转换 zeroCount = 0 IntLen = len(IntegerNum) for i in range(0,IntLen): n = IntegerNum[i]#整数部分字符串的第i个字符 p = IntLen - i - 1 q = p // 4#地板除,p<4时,为0,4>p<8时,等于1,等于8时,等于2 m = p % 4#求余数,p<4时,为p,=4时为0 if n == "0": zeroCount +=1 else: if zeroCount > 0: ChineseStr += cnNums[0] zeroCount = 0 #归零 ChineseStr += cnNums[int(n)] + cnIntRadice[m] if m == 0 and zeroCount ChineseStr += cnIntUnits[q] ChineseStr += cnIntLast#整型部分处理完毕
if DecimalNum != "":#小数部分 decLen = len(DecimalNum) for i in range(0,decLen): n = DecimalNum[i] if n != "0": ChineseStr += cnNums[int(n)] + cnDecUnits[i] if ChineseStr == "":
ChineseStr += cnNums[0] + cnIntLast + cnInteger elif DecimalNum == "0": ChineseStr += cnInteger ChineseStr = Symbol + ChineseStr return ChineseStr
numtomoney(5648.89)
数据提取和数据处理部分
def get_data(file_path): pdf_path=Path(file_path) pdf_files=pdf_path.glob('*.pdf') for pdf_file in pdf_files: df_list=[] pdf = pdfplumber.open(path_or_fp=pdf_file) for page in pdf.pages: table=page.extract_table()#提取page对象中的表格数据为列表 df=pd.DataFrame(table)#把列表数据存入DataFrame中 df_list.append(df) df_total=pd.concat(df_list)#合并表格 df_total=df_total.reset_index(drop=True)#重置索引,并删除原索引 df_total.columns=df_total.iloc[0,:].tolist()#用第一行作为表头 df_total.drop(index=0,inplace=True)#删除第一行 df_total=df_total.reindex(columns=df_total.columns[[0,1,3,2,6,5,7,4]])#调整列顺序 df_total.drop(columns=['配置要求'],inplace=True)#删除配置要求列
df_total.columns=['序号','名称','规格及型号','品牌','单位','数量','单价(元)'] df_total['数量']=pd.to_numeric(df_total['数量'],errors='coerce') df_total['单价(元)']=pd.to_numeric(df_total['单价(元)'],errors='coerce') df_total['合价(元)']=df_total['数量']*df_total['单价(元)'] df_total['存放位置']=pd.NA df_total.drop(index=df_total.index.tolist()[-2:],inplace=True)#删除最后两行数据 df_total=df_total.applymap(lambda x : x.replace('\n','') if isinstance(x,str) else x)#去除单元格中的换行 total=df_total['合价(元)'].sum(axis=0) money=numtomoney(total) row_num=df_total.shape[0] df_total.loc[row_num+2,'序号']=f'{money}(¥{"%.2f" % total})'#2位小数 df_total.loc[row_num+3,'序号']='采购申请人签字:' df_total.loc[row_num+4,'序号']='实训室管理员签字:' p=Path(Path.cwd()) p1=p/'已提取文件' if not p1.exists():#判断文件夹是否存在,不存在就新建文件夹 p1.mkdir(exist_ok=True) df_total.to_excel(f'./已提取文件/{pdf_file.stem}.xlsx',index=False)
设置表格样式
def modify_style(wb,first_row_value,second_row_value,place,file_name): ''' first_row_value 第一行标题内容 second_row_value 第二行标题内容 place,file 表格中'存放位置'列内容 file_name 最终保存的文件名称 ''' for sheet_name in wb.sheetnames: ws = wb[sheet_name] ws.insert_rows(1,2)#在第一行前面插入2行空行 alignment=Alignment(horizontal="center", vertical="center",wrap_text=True) ws['A1'].value=first_row_value ws['A1'].font=Font(name='宋体', size=16, bold=True, color='FF000000') ws['A1'].alignment=alignment ws['A2'].value=second_row_value ws['A2'].font=Font(name='宋体', size=11, bold=True, color='FF000000') ws['A2'].alignment=alignment ws.merge_cells('A1:I1')#合并单元格 ws.merge_cells('A2:I2') maxrows = ws.max_row # 获取最大行 ws.column_dimensions["B"].width = 24 # 设置B列宽度为15 for col in ['C','D','G','H']: ws.column_dimensions[col].width = 12 # 批量设置指定列宽度为12 for col in ['A','E','F','I']: ws.column_dimensions[col].width = 4.5 # 批量设置指定列宽度为4.5 for i in range(3,maxrows+1): cells = ws[i] font = Font(name='宋体', size=11, bold=False, italic=False, color=
'FF000000') alignment = Alignment(horizontal="center", vertical="center",wrap_text=True) # 先定好side的格式 side_left = Side(style='thin', color='FF000000') side_right = Side(style='thin', color='FF000000') # 代入边线中 border = Border(left=side_left, right=side_right, top=side_right, bottom=side_left) for cell in cells: cell.font = font cell.alignment = alignment cell.border = border ws['A1'].font=Font(size=16, bold=True) ws['A2'].font=Font(bold=True) ws['A2'].alignment= Alignment(horizontal="center", vertical="center",wrap_text=True) ws.row_dimensions[2].height=32#设置第二行行高 ws.row_dimensions[maxrows-1].height=28#设置倒数第二行行高 ws.row_dimensions[maxrows].height=28#设置倒数第一行行高 for i in range(3): ws.merge_cells(f'A{maxrows-2+i}:H{maxrows-2+i}') ws[f'A{maxrows-2+i}'].alignment=Alignment(horizontal="left", vertical="center",wrap_text=True) ws['I4'].value=place for col in ['G','H']:#设置金钱符号 for i in range(4,maxrows+1): ws[f'{col}{i}'].number_format='¥#,##0.00;¥-#,##0.00' ws.merge_cells(f'I4:I{maxrows}')
wb.save('已完成-'+f'{file_name}.xlsx') file_path=Path(Path.cwd()/'已提取文件') files=file_path.glob('*.xlsx') for file in files: wb=load_workbook(file) file_name=file.stem modify_style(wb,'绍兴市柯桥区职业教育中心耗材入库单','项目名称:柯桥区职业教育中心建筑专业实训耗材采购项目\n入库时间:2022年9月29日 ','1号实训楼2楼库房',file_name)