d = readLines("~cr173/Sta523/data/world_cup_goals_raw.txt")
library(stringr)
d[1]
## [1] "<div id=\"g1\" pig=\"92\" class=\"goal cmr \" pid=\"346\" mid=\"1\" pname=\"Joel Matip\" pteam=\"Cameroon\" ptime=\"26\"  paddtime=\"0\" ptype=\"goal\" pteamimg=\"flags/cmr.png\" pmatch=\"1\" style=\"left: 505px; top: 22px;\"></div>"
t = d[1]
t
## [1] "<div id=\"g1\" pig=\"92\" class=\"goal cmr \" pid=\"346\" mid=\"1\" pname=\"Joel Matip\" pteam=\"Cameroon\" ptime=\"26\"  paddtime=\"0\" ptype=\"goal\" pteamimg=\"flags/cmr.png\" pmatch=\"1\" style=\"left: 505px; top: 22px;\"></div>"
str_match_all(t, "id=")
## [[1]]
##      [,1] 
## [1,] "id="
## [2,] "id="
## [3,] "id="
str_match_all(t, " id=")
## [[1]]
##      [,1]  
## [1,] " id="
str_match_all(t, " id=\"\"")
## [[1]]
## character(0)
str_match_all(t, " id=\".*\"")
## [[1]]
##      [,1]                                                                                                                                                                                                                           
## [1,] " id=\"g1\" pig=\"92\" class=\"goal cmr \" pid=\"346\" mid=\"1\" pname=\"Joel Matip\" pteam=\"Cameroon\" ptime=\"26\"  paddtime=\"0\" ptype=\"goal\" pteamimg=\"flags/cmr.png\" pmatch=\"1\" style=\"left: 505px; top: 22px;\""
str_match_all(t, " id=\"[a-z0-9]*\"")
## [[1]]
##      [,1]        
## [1,] " id=\"g1\""
str_match_all(t, " id=\"([a-z0-9]*)\"")
## [[1]]
##      [,1]         [,2]
## [1,] " id=\"g1\"" "g1"
str_match_all(d[1:3], " id=\"([a-z0-9]*)\"")
## [[1]]
##      [,1]         [,2]
## [1,] " id=\"g1\"" "g1"
## 
## [[2]]
##      [,1]         [,2]
## [1,] " id=\"g2\"" "g2"
## 
## [[3]]
##      [,1]         [,2]
## [1,] " id=\"g3\"" "g3"
str_match_all(t, " class=\"[a-z ]\"")
## [[1]]
## character(0)
str_match_all(t, " class=\"[a-z\ ]\"")
## [[1]]
## character(0)
str_match_all(t, " class=\"[a-z ]*\"")
## [[1]]
##      [,1]                  
## [1,] " class=\"goal cmr \""
str_match_all(t, " class=\"([a-z ]*)\"")
## [[1]]
##      [,1]                   [,2]       
## [1,] " class=\"goal cmr \"" "goal cmr "