解析报表模板文件获取模板的数据集和SQL语句,并保存为Excel
完整代码
import json
import pandas as pd
from lxml import etree
import os
result_li = []
for root, dirs, files in os.walk(".\\reportlets", topdown=False):
for name in files:
abs_name = os.path.join(os.path.abspath(root), name)
# fineReport .cpt .frm 本身是xml文件,故可直接按行匹配字符查找是否与对象有关
if abs_name[-4:] not in [".cpt", ".frm"]:
continue
# 打开.cpt .frm
with open(abs_name, mode="rb") as fr:
xslt_content = fr.read()
xml_root = etree.XML(xslt_content)
tabel_data_el = xml_root.xpath("//*/TableDataMap//TableData[@class='com.fr.data.impl.DBTableData']")
for db_data in tabel_data_el:
# 数据集名称
date_source_name = db_data.get("name")
# 转 string
string = etree.tostring(db_data, encoding='utf-8').decode('utf-8')
data_source = etree.XML(string)
conn = data_source.xpath("/TableData/Connection/DatabaseName/text()")
conn_collect = []
for j in conn:
# 数据链接名字
conn_collect.append(str(j).strip())
conn_string = "".join(conn_collect)
query = data_source.xpath("/TableData/Query/text()")
query_collect = []
for j in query:
# 数据集SQL
query_collect.append(str(j).strip())
query_string = "".join(query_collect)
data_source_json = {
"report": abs_name,
"data_source": date_source_name,
"conn": conn_string,
"query": query_string
}
result_li.append(data_source_json)
print(json.dumps(data_source_json, ensure_ascii=False))
print("-----------------------------------")
df = pd.DataFrame(result_li)
df.to_excel("./data_source.xlsx", engine="openpyxl", index=False)
|