
利用
API
提取文本
|
53
output_json = response.json()
if 'next' in response.links:
next_url = response.links['next']['url']
if next_url is not None:
output_json += get_all_pages(next_url, params, headers)
return output_json
out = get_all_pages(
"https://api.github.com/repos/pytorch/pytorch/issues/comments",
params={
'since': '2020-07-01T10:00:01Z',
'sorted': 'created',
'direction': 'desc'
},
headers={'Accept': 'application/vnd.github.v3+json'})
df = pd.DataFrame(out)
print (df['body'].count())
df[['id','created_at','body']].sample(1)
输出结果:
3870
id created_at body
2176
286601372 2017-03-15T00:09:46Z ...