# # install.packages("devtools") # library(devtools) # install_github('yikeshu0611/rs.driver') # 安装R包 #install.packages('RSelenium') #java -Dwebdriver.chrome.driver="chromedriver.exe" -jar selenium-server-standalone-4.0.0-alpha-2.jar -port 4444 # 加载R包 #library(devtools) library(readxl) library(RSelenium) library(rvest) library(xml2) library(RS.Driver) library(tidyverse) name <- read_xlsx("example_tempelate.xlsx",sheet = 1) RS.OpenChrome() url <- c() id<-name$`Compound Name` for (i in id) { url <- c(url,paste('https://www.mzcloud.org/compound/Search?Query=', i,sep = '')) } title <- c() ID <- c() for(i in 1:length(url)){ RS.open_Url(url[i]) Sys.sleep(2) title <- c(title,RS.get_Text('/html/body/div/div[2]/div/div//div[2]/div/div[2]/div/div[1]/a')) ID <- c(ID,RS.get_Text('/html/body/div/div[2]/div/div/div[*]/div[2]/div/div[2]/div/div[2]/p/span')) } df <- data.frame(title = title,ID=ID) df<- filter(df,!duplicated(df$ID)) df1<-c() for(i in 1:length(id)){ df1 <- c(df1,df1<-df[id[i]==df$title,]) } library(plyr) df2 <-ldply(df1, data.frame) colnames(df2)<-c('id','name') df3<-data.frame(df2[df2$id=='title',]) df3.1<-data.frame(df2[df2$id=='ID',]) df4<-cbind(df3,df3.1) df4<-df4[,c(2,4)] colnames(df4)<-c('name','id') df4_1<-gsub("ID: Reference","",df4$id) df4_2<-cbind(df4,df4_1) colnames(df4_2)<-c('name','id_1','id') df4<-df4_2[,c(1,3)] id2<-df4$id url2<-c() for (i in id2) { url2 <- c(url2,paste('https://www.mzcloud.org/compound/Reference/', i,sep = '')) } RS.OpenChrome() Key<-c() SMILES<-c() title<-c() for(i in 1:length(url2)){ RS.open_Url(url2[i]) Sys.sleep(2.5) Key <- c(Key,RS.get_Text('/html/body/div/div[7]/div/div/table/tbody/tr[2]/td[2]')) SMILES<-c(SMILES,RS.get_Text('/html/body/div/div[7]/div/div/table/tbody/tr[3]/td[2]')) title <- c(title,RS.get_Text('/html/body/div/div[1]/div/h3')) } title_Key_SMILES <- data.frame(Compound_Name=title, InChiKeys = Key, SMILES=SMILES) RS.OpenChrome() Pubchem_ID<-c() url3<-c() for (i in length(Key)) { url3 <- c(url3,paste('https://pubchem.ncbi.nlm.nih.gov/#query=', Key,sep = '')) } for(i in 1:length(Key)){ RS.open_Url(url3[i]) Sys.sleep(2.5) Pubchem_ID<-c(Pubchem_ID,RS.get_Text('/html/body/div[1]/div/div/main/div[2]/div[2]/div[3]/div/div/div/div[2]/ul/li/div/div/div[1]/div[2]/div[2]/div/span/a/span/span')) } Pubchem_ID <- data.frame(Pubchem_ID=Pubchem_ID) total<-cbind(title_Key_SMILES,Pubchem_ID)
暂无评论