提取http响应包中的json输出为csv
python
from glob import glob
import json
import csv
import traceback
def read_file(name):
with open(name, 'r', encoding='utf-8') as fp:
return fp.read()
def get_json_from_html(html):
if 'HTTP/1.1' in html and '\n\n' in html:
html = html.split('\n\n')[1]
return json.loads(html)
def gen_html():
for item in glob("data/*/*"):
try:
data = get_json_from_html(read_file(item))['data']['content']
except:
data = []
yield data
def save_contents(name, contents):
with open(name, 'w', newline='', encoding='utf-8') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
for i, data in enumerate(contents):
if i == 0:
spamwriter.writerow(data.keys())
spamwriter.writerow(data.values())
print('write', name)
page = 0
count = 0
contents = []
for jdata in gen_html():
try:
contents.extend(jdata)
count += 1
if count == 100:
save_contents(f'file_{page}_{page+count}.csv', contents)
page = page + count
contents = []
count = 0
except:
traceback.print_exc()
if count < 100:
save_contents(f'file_{page}_{page+count}.csv', contents)