I scraped the TFR data: https://drive.google.com/file/d/1trKrpGYV2m37yKeCkwz30xDq3IDiInH8/view?usp=sharing
import pandas as pd
import requests
import csv
import time
with open('cactus_tfrs.csv', encoding='utf-8', mode='w+', newline='') as fileout:
csvwriter = csv.writer(fileout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["match type", "iNat taxa", "POWO taxa"])
for i in range(1,65):
time.sleep(1)
try:
r = requests.get("https://www.inaturalist.org/taxon_framework_relationships?taxon_id=47903&page=" + str(i))
df_list = pd.read_html(r.text) # parse all the tables on the page to a list of dataframes
for df in df_list:
# write the match type, then the iNat taxa, then the POWO taxa
csvwriter.writerow([df.iloc[0,1], ';'.join(df.iloc[:,0].dropna().tolist()), ';'.join(df.iloc[:,2].dropna().tolist())])
except:
print("error on page " + str(i))
