使用递归的方法自动分析嵌套json的结构,获得pd.json_normalize函数中的record_path和meta参数,然后将嵌套json转为pd.DataFrame。
class json_nor():
def __init__(self, data=None):
"""初始化
Args:
data (json, optional): json文件. Defaults to None.
"""
self.data = data
self.arg_record = []
self.arg_meta = []
self.nor_data = None
@staticmethod
def get_dict_allkeys(value,key=None,arg_record=None,arg_meta=None):
"""获得json_normalize函数的record_path和meta参数
Args:
value (json) : dict/json
key (list, optional) : key值列表. Defaults to None.
arg_record (list, optional) : recore_path参数. Defaults to None.
arg_meta (list, optional) : meta参数. Defaults to None.
"""
if key is None:
key = []
if isinstance(value, dict): # 使用isinstance检测数据类型
for key_sub,value_sub in value.items():
l = []
l.extend(key)
l.append(key_sub)
json_nor.get_dict_allkeys(value_sub,key=l,arg_record=arg_record,arg_meta=arg_meta) # 自我调用实现无限遍历
elif isinstance(value, list):
arg_record.append(key[0])
else:
if len(key) == 1:
arg_meta.append(key[0])
else:
arg_meta.append(key)
def run(self,meta_prefix='meta',sep='->',arg_data=None,nor_data=None,errors = 'raise'):
"""运行程序入口
Args:
meta_prefix (str, optional) : meta前缀. Defaults to 'meta'.
sep (str, optional) : 分隔符. Defaults to '->'.
agg_data (dict, optional) : 用来获得参数的数据. Defaults to None.
nor_data (dict, optional) : 用来转换的数据. Defaults to None.
errors (str, optional) : 'raise' or 'ignore'. Defaults to 'raise'.
Returns:
DataFrame: 转换后数据
"""
if arg_data is None:
arg_data = self.data
if nor_data is None:
nor_data = self.data
json_nor.get_dict_allkeys(arg_data,arg_record=self.arg_record,arg_meta=self.arg_meta)
if len(self.arg_record) == 1:
self.nor_data = pd.json_normalize(nor_data,record_path=self.arg_record,meta=self.arg_meta,record_prefix=f'{self.arg_record[0]}->',meta_prefix=f'{meta_prefix}{sep}',sep=sep,errors=errors)
elif len(self.arg_record) >1:
self.nor_data = pd.json_normalize(nor_data,record_path=self.arg_record[0],meta=self.arg_meta,record_prefix=f'{self.arg_record[0]}->',meta_prefix=f'{meta_prefix}{sep}',sep=sep,errors=errors)
for i in range(1,len(self.arg_record)):
df_temp = pd.json_normalize(nor_data,record_path=self.arg_record[i],record_prefix=f'{self.arg_record[i]}->',sep=sep,errors=errors)
self.nor_data = pd.concat([df_temp,self.nor_data],axis=1)
else:
self.nor_data = pd.json_normalize(nor_data,sep=sep,errors=errors)
return self.nor_data
def get_arg(self):
"""获得record_path和meta参数
Returns:
tuple: (arg_record,arg_meta)
"""
json_nor.get_dict_allkeys(self.data[0],arg_record=self.arg_record,arg_meta=self.arg_meta)
return self.arg_record,self.arg_meta