Python-将多层 API JSON 解析为 CSV 的问题

Posted

技术标签:

【中文标题】Python-将多层 API JSON 解析为 CSV 的问题【英文标题】:Python- Issue parsing multi-layered API JSON into CSV 【发布时间】:2022-01-01 13:44:24 【问题描述】:

我正在尝试解析 NIH 授权 API 并遇到了一个复杂的分层问题。在下面的JSON 输出中,我已经能够导航到包含我想要的所有字段的"results" 部分,除了一些在另一个字典中分层。我要做的是让"full_study_section""organization""project_num_split" 中的JSON 数据与"appl_id""contact_pi_name""fiscal_year" 位于同一层。这个post 很有帮助,但我不太确定如何通过迭代来调整图层。


"meta":
    "limit":25,
    "offset":0,
    "properties":,
    "search_id":null,
    "sort_field":"project_start_date",
    "sort_order":"desc",
    "sorted_by_relevance":false,
    "total":78665
,
"results":[
    
        "appl_id":10314644,
        "contact_pi_name":"BROCATO, EMILY ROSE",
        "fiscal_year":2021,
        "full_study_section":
            "group_code":"32",
            "name":"Special Emphasis Panel[ZAA1 GG (32)]",
            "sra_designator_code":"GG",
            "sra_flex_code":"",
            "srg_code":"ZAA1",
            "srg_flex":""
        ,
        "organization":
            "city":null,
            "country":null,
            "dept_type":"PHARMACOLOGY",
            "external_org_id":353201,
            "fips_country_code":null,
            "org_city":"RICHMOND",
            "org_country":"UNITED STATES",
            "org_duns":[
                "105300446"
            ],
            "org_fips":"US",
            "org_ipf_code":"353201",
            "org_name":"VIRGINIA COMMONWEALTH UNIVERSITY",
            "org_state":"VA",
            "org_state_name":null,
            "org_zipcode":"232980568"
        ,
        "project_end_date":null,
        "project_num":"1F31AA029259-01A1",
        "project_num_split":
            "activity_code":"F31",
            "appl_type_code":"1",
            "full_support_year":"01A1",
            "ic_code":"AA",
            "serial_num":"029259",
            "suffix_code":"A1",
            "support_year":"01"
        ,
        "project_start_date":"2022-03-07T05:00:00Z",
        "subproject_id":null
    ,

代码:

import requests
import json
import csv


params = 
     "criteria":
     
       "fiscal_years":[2021]
     ,
     "include_fields": [
        "ApplId","ContactPiName","FiscalYear",
        "OrgCountry","AllText",
        "FullStudySection","Organization","ProjectEndDate",
        "ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
     ],
     "offset":0,
     "limit":25,
     "sort_field":"project_start_date",
     "sort_order":"desc"
 
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)

#print(response.status_code)

#print(response.text)
     
resdecode = json.loads(response.text)

#print(json.dumps(resdecode, sort_keys=True, indent=4, separators=(',', ':')))

data = resdecode["results"]

#print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ':')))

pns = resdecode["results"][0]["project_num_split"]

#print(json.dumps(pns, sort_keys=True, indent=4, separators=(',', ':')))

# for item in data:
#     appl_id = item.get("appl_id")
#     print(appl_id)        
    
writerr = csv.writer(open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = ''))   
    
count = 0

for row in resdecode:

      if count == 0:

              header = resdecode.keys()

              writerr.writerow(header)

              count += 1

      writerr.writerow(row)

writerr.close()

【问题讨论】:

你想得到什么输出? @rv.kvetch 我要做的是让“full_study_section”、“organization”和“project_num_split”中的 JSON 数据与“appl_id”、“contact_pi_name”位于同一层"、"财政年度"等。以便 CSV 文件具有相同级别的这些标头。 【参考方案1】:

为了将 full_study_section、organization 和 project_num_split 下的项目移动到与 appl_id、contact_pi_name 和 finance_year 相同的级别,您必须遍历每个结果并为这三个 dicts 重新创建这些键值对,然后删除 full_study_section,组织和 project_num_split 键一旦完成。下面的代码应该可以按预期工作。

import requests
import json
import csv


params = 
     "criteria":
     
       "fiscal_years":[2021]
     ,
     "include_fields": [
        "ApplId","ContactPiName","FiscalYear",
        "OrgCountry","AllText",
        "FullStudySection","Organization","ProjectEndDate",
        "ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
     ],
     "offset":0,
     "limit":25,
     "sort_field":"project_start_date",
     "sort_order":"desc"
 
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)

    
resdecode = json.loads(response.text)
data = resdecode["results"]


for item in data:
    x = ["full_study_section","organization","project_num_split"]
    for i in x:
        for key, value in item[i].items():
            item[key] = value
        del item[i]

with open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = '') as f:
    writer = csv.writer(f)
    count = 0
    for row in data:
        if count == 0:
            header = row.keys()
            writer.writerow(header)
            count =+ 1
        writer.writerow(row.values())

【讨论】:

【参考方案2】:

您可以将项目移动到所需的级别并删除字典。

import json
import pprint

pp = pprint

file = open("test.json")
jsonData = json.load(file)

full_study_section = jsonData['results'][0]['full_study_section']
organization = jsonData['results'][0]['organization']
project_num_split = jsonData['results'][0]['project_num_split']

jsonData['results'][0].update(full_study_section)
jsonData['results'][0].update(project_num_split)
jsonData['results'][0].update(organization)

jsonData['results'][0].pop('full_study_section')
jsonData['results'][0].pop('project_num_split')
jsonData['results'][0].pop('organization')

pp.pprint(jsonData)

输出:

u'meta': u'limit': 25,
           u'offset': 0,
           u'properties': ,
           u'search_id': None,
           u'sort_field': u'project_start_date',
           u'sort_order': u'desc',
           u'sorted_by_relevance': False,
           u'total': 78665,
 u'results': [u'activity_code': u'F31',
               u'appl_id': 10314644,
               u'appl_type_code': u'1',
               u'city': None,
               u'contact_pi_name': u'BROCATO, EMILY ROSE',
               u'country': None,
               u'dept_type': u'PHARMACOLOGY',
               u'external_org_id': 353201,
               u'fips_country_code': None,
               u'fiscal_year': 2021,
               u'full_support_year': u'01A1',
               u'group_code': u'32',
               u'ic_code': u'AA',
               u'name': u'Special Emphasis Panel[ZAA1 GG (32)]',
               u'org_city': u'RICHMOND',
               u'org_country': u'UNITED STATES',
               u'org_duns': [u'105300446'],
               u'org_fips': u'US',
               u'org_ipf_code': u'353201',
               u'org_name': u'VIRGINIA COMMONWEALTH UNIVERSITY',
               u'org_state': u'VA',
               u'org_state_name': None,
               u'org_zipcode': u'232980568',
               u'project_end_date': None,
               u'project_num': u'1F31AA029259-01A1',
               u'project_start_date': u'2022-03-07T05:00:00Z',
               u'serial_num': u'029259',
               u'sra_designator_code': u'GG',
               u'sra_flex_code': u'',
               u'srg_code': u'ZAA1',
               u'srg_flex': u'',
               u'subproject_id': None,
               u'suffix_code': u'A1',
               u'support_year': u'01']

【讨论】:

以上是关于Python-将多层 API JSON 解析为 CSV 的问题的主要内容,如果未能解决你的问题,请参考以下文章

如何用Python解析多层嵌套的JSON?

python json快速解析命令

c#中怎么解析多层json数据

python中jsonpath模块,解析多层嵌套的json数据

反序列化多层嵌套json实体

在没有 API 的情况下将 JSON 解析为 python [重复]