pubmed 爬虫
最后发布时间:2022-08-25 13:56:36
浏览量:
import requests
import pandas as pd
from bs4 import BeautifulSoup
df = pd.DataFrame(columns=['title','url','abstract'])
def get_pubmed(keyword, page):
url = 'https://pubmed.ncbi.nlm.nih.gov'
rep = requests.get(f'{url}/?term={keyword}&page={page}')
html = BeautifulSoup(rep.text, features='html.parser')
li = html.find_all(class_='docsum-title')
if len(li):
for index, item in enumerate(li):
print(item.text.strip())
print(url+item['href'])
rep_content = requests.get(url+item['href'])
html_content = BeautifulSoup(rep_content.text, features='html.parser')
abstract = html_content.find_all(class_='abstract-content')
print(abstract[0].text)
df.loc[len(df.index)] = [item.text.strip(), url+item['href'], abstract[0].text]
return True
return False
for page in range(1):
get_pubmed("metagenomic", page+1)