import requests import pandas as pd from bs4 import BeautifulSoup df = pd.DataFrame(columns=['title','url','abstract']) def get_pubmed(keyword, page): url = 'https://pubmed.ncbi.nlm.nih.gov' rep = requests.get(f'{url}/?term={keyword}&page={page}') html = BeautifulSoup(rep.text, features='html.parser') li = html.find_all(class_='docsum-title') if len(li): for index, item in enumerate(li): print(item.text.strip()) print(url+item['href']) rep_content = requests.get(url+item['href']) html_content = BeautifulSoup(rep_content.text, features='html.parser') abstract = html_content.find_all(class_='abstract-content') print(abstract[0].text) df.loc[len(df.index)] = [item.text.strip(), url+item['href'], abstract[0].text] return True return False for page in range(1): get_pubmed("metagenomic", page+1)