This does it! I spent some time adapting your code to extract observation fields as well (and adjust to my particular needs); this is what I ended up with in case anyone else wants this code (or if you want to make any comments/improvements). Thank you so much!
library(jsonlite)
library(data.table)
### Get annotation codes
a <- fromJSON("https://api.inaturalist.org/v1/controlled_terms")
a <- flatten(a$results)
l <- lapply(seq_along(a[, "values"]), function(i) {
cbind(idann = a$id[i], labelann = a$label[i], a[i, "values"][[1]][, c("id", "label")])
})
ann <- do.call("rbind", l)
ann
### Request url
url <-
paste0(
"https://api.inaturalist.org/v1/observations?quality_grade=any&identifications=any&taxon_id=1195336"
)
# Get json and flatten
x <- fromJSON(url)
x <- flatten(x$results)
keep <-
c("id", "observed_on", "taxon.name","location","uri","ofvs") # values to keep
### Extract annotations if any
vals <- lapply(seq_along(x$annotations), function(i) {
j <- x$annotations[[i]]
n <- c("controlled_attribute_id", "controlled_value_id")
if (all(n %in% names(j))) { # tests if there are any annotations for the obs
ans <- j[, n]
} else{
ans <- data.frame(x = NA, y = NA) # if no annotations create NA data.frame
names(ans) <- n
}
cbind(x[i, keep][rep(1, nrow(ans)), ], ans) # repeat obs for each annotation value and bind with ann
})
vals <- do.call("rbind", vals) # bind everything
keep <-
c("id", "observed_on", "taxon.name","location","uri","ofvs","controlled_attribute_id", "controlled_value_id") # values to keep
### Extract observation fields if any
of <- lapply(seq_along(vals$ofvs), function(i) {
f <- vals$ofvs[[i]]
m <- c("name", "value")
if (all(m %in% names(f))) { # tests if there are any annotations for the obs
ans <- f[, m]
} else{
ans <- data.frame(x = NA, y = NA) # if no annotations create NA data.frame
names(ans) <- m
}
cbind(vals[i, keep][rep(1, nrow(ans)), ], ans) # repeat obs for each annotation value and bind with ann
})
of <- do.call("rbind", of) # bind everything
# obs <- merge(obs, of)
## Merge obs with annotations
obs <-
merge(
of,
ann,
by.x = c("controlled_attribute_id", "controlled_value_id"),
by.y = c("idann", "id"),
all.x = TRUE
)
obs <- obs[order(obs$id), ]
### Cast from long to wide and concatenate annotation values
# Results in a single line per obs
setDT(obs) # turn df to data.table to use dcast
obs <- dcast(
obs,
id + uri + observed_on + location + taxon.name + name + value ~ labelann,
value.var = "label",
fun = function(i) {
paste(i, collapse = "; ")
}
)
names(obs) <- gsub(" ", "_", names(obs)) # remove spaces from column names
setDT(obs) # turn df to data.table to use dcast
obs <- dcast(
obs,
id + uri + observed_on + location + taxon.name + Alive_or_Dead + Evidence_of_Presence + Life_Stage + Sex ~ name,
value.var = "value",
fun = function(i) {
paste(i, collapse = "; ")
}
)
names(obs) <- gsub(" ", "_", names(obs)) # remove spaces from column names
obs <- obs[,c("id", "observed_on", "taxon.name","location","uri","Evidence_of_Presence","Life_Stage","Gall_generation","Gall_phenophase")]
obs <- obs[!obs$Gall_generation=="",] # set missing plant phenology ann. to NULL
obs <- obs %>% separate(location, c("Latitude","Longitude"), ",")
obs # this can be converted back to a df with as.data.frame