經過一番研究後程式碼如下:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(RCurl) | |
library(httr) | |
Sys.setlocale(category='LC_ALL', locale='C') | |
##generate the list of url | |
url='http://www.twse.com.tw/ch/trading/indices/twco/tai50i.php' | |
#網頁內有中文字,先以Big5編碼捉取網頁 | |
get_url_parse =htmlParse(url,encoding ='BIG5') | |
#抓取關鍵的變項,我們需要的變項夾在一個table的class=tb2,裡面<tr>標籤裡面 | |
tablehead <- xpathSApply(get_url_parse, "//tr[@class='tb2']/td", xmlValue) | |
#將擷取到的關鍵字轉成XP系統內建編碼 CP950 | |
#特別的是經過xpathSApply解析出來的文字編碼,似乎會自動從BIG5變為UTF-8? | |
tablehead<-iconv(tablehead,"UTF-8","CP950") | |
#將擷取到的關鍵字轉成容易閱讀的矩陣格式 | |
table <- matrix(tablehead, ncol = 6, byrow = T) | |
#將 Taiwan 50 成分股寫入 csv 檔內 | |
write.csv(table,file = "TWN50composition.csv" ,row.names = FALSE) |
程式執行所產生的結果如下:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"V1","V2","V3","V4","V5","V6" | |
"3474","華亞科","9576","5,093,622,000","50.00%","0.83%" | |
"4938","和碩","9572","2,290,304,935","74.00%","1.03%" | |
"3481","群創","9572","9,385,527,616","98.00%","1.04%" | |
"2330","台積電","9576","25,753,417,412","93.00%","25.82%" | |
"2303","聯電","9576","12,706,314,290","94.00%","1.34%" | |
"2882","國泰金","8575","12,606,238,526","63.00%","2.67%" | |
"2357","華碩","9572","752,760,280","95.00%","1.65%" | |
"1303","南亞","1353","7,930,821,589","72.00%","2.77%" | |
"2883","開發金","8775","15,172,996,640","93.00%","1.09%" | |
"1301","台塑","1353","6,365,673,217","77.00%","2.66%" | |
"2002","中鋼","1757","15,733,113,947","79.00%","2.29%" | |
"2311","日月光","9576","7,810,454,946","79.00%","1.87%" | |
"2317","鴻海","2733","14,581,787,562","87.00%","7.64%" | |
"1402","遠東新","3763","5,247,916,886","75.00%","0.90%" | |
"2892","第一金","8355","9,259,254,819","79.00%","0.96%" | |
"2880","華南金","8355","9,327,970,100","75.00%","0.88%" | |
"2801","彰銀","8355","6,121,980,625","52.00%","0.41%" | |
"1216","統一","3577","5,463,476,316","85.00%","1.73%" | |
"1101","台泥","2353","3,692,175,869","87.00%","0.96%" | |
"1102","亞泥","2353","3,360,379,285","69.00%","0.63%" | |
"2382","廣達","9572","3,832,574,432","69.00%","1.48%" | |
"2308","台達電","2737","2,417,141,304","84.00%","2.98%" | |
"1326","台化","1353","5,861,186,297","75.00%","2.19%" | |
"2886","兆豐金","8355","12,484,346,574","80.00%","1.75%" | |
"2891","中信金","8355","15,257,281,448","97.00%","2.16%" | |
"2325","矽品","9576","3,116,361,139","95.00%","1.16%" | |
"2105","正新","3357","3,241,414,671","57.00%","0.99%" | |
"2395","研華","9572","628,272,477","54.00%","0.58%" | |
"2408","南科","9576","2,396,100,810","15.00%","0.20%" | |
"2412","中華電","6535","7,757,446,545","49.00%","2.62%" | |
"2409","友達","9572","9,435,154,398","92.00%","1.01%" | |
"2207","和泰車","5379","546,179,184","75.00%","1.39%" | |
"2301","光寶科","2733","2,330,795,827","91.00%","0.58%" | |
"9904","寶成","3765","2,941,665,922","90.00%","0.86%" | |
"2912","統一超","5337","1,039,622,256","54.00%","1.00%" | |
"2354","鴻準","2737","1,357,569,968","78.00%","0.59%" | |
"2474","可成","2757","750,691,371","96.00%","1.47%" | |
"3045","台灣大","6575","3,420,832,827","49.00%","1.29%" |