diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..c8fd901
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..b26911b
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_github_abola_crawler_1_1_1.xml b/.idea/libraries/Maven__com_github_abola_crawler_1_1_1.xml
new file mode 100644
index 0000000..61069f6
--- /dev/null
+++ b/.idea/libraries/Maven__com_github_abola_crawler_1_1_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_google_guava_guava_19_0.xml b/.idea/libraries/Maven__com_google_guava_guava_19_0.xml
new file mode 100644
index 0000000..68e23cc
--- /dev/null
+++ b/.idea/libraries/Maven__com_google_guava_guava_19_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_googlecode_juniversalchardet_juniversalchardet_1_0_3.xml b/.idea/libraries/Maven__com_googlecode_juniversalchardet_juniversalchardet_1_0_3.xml
new file mode 100644
index 0000000..b127443
--- /dev/null
+++ b/.idea/libraries/Maven__com_googlecode_juniversalchardet_juniversalchardet_1_0_3.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_mashape_unirest_unirest_java_1_4_9.xml b/.idea/libraries/Maven__com_mashape_unirest_unirest_java_1_4_9.xml
new file mode 100644
index 0000000..d7792cc
--- /dev/null
+++ b/.idea/libraries/Maven__com_mashape_unirest_unirest_java_1_4_9.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_codec_commons_codec_1_2.xml b/.idea/libraries/Maven__commons_codec_commons_codec_1_2.xml
new file mode 100644
index 0000000..fbcb992
--- /dev/null
+++ b/.idea/libraries/Maven__commons_codec_commons_codec_1_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml
new file mode 100644
index 0000000..66e6537
--- /dev/null
+++ b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_io_commons_io_2_5.xml b/.idea/libraries/Maven__commons_io_commons_io_2_5.xml
new file mode 100644
index 0000000..67c2ad2
--- /dev/null
+++ b/.idea/libraries/Maven__commons_io_commons_io_2_5.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_logging_commons_logging_1_2.xml b/.idea/libraries/Maven__commons_logging_commons_logging_1_2.xml
new file mode 100644
index 0000000..eab40b3
--- /dev/null
+++ b/.idea/libraries/Maven__commons_logging_commons_logging_1_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_commons_commons_compress_1_12.xml b/.idea/libraries/Maven__org_apache_commons_commons_compress_1_12.xml
new file mode 100644
index 0000000..d28b2ea
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_commons_commons_compress_1_12.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_commons_commons_vfs2_2_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_vfs2_2_1.xml
new file mode 100644
index 0000000..eff22b6
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_commons_commons_vfs2_2_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_httpcomponents_httpasyncclient_4_1_1.xml b/.idea/libraries/Maven__org_apache_httpcomponents_httpasyncclient_4_1_1.xml
new file mode 100644
index 0000000..8484ecb
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_httpcomponents_httpasyncclient_4_1_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_5_2.xml b/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_5_2.xml
new file mode 100644
index 0000000..fdb7ead
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_5_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_4_4.xml b/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_4_4.xml
new file mode 100644
index 0000000..3a5aa19
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_4_4.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_nio_4_4_4.xml b/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_nio_4_4_4.xml
new file mode 100644
index 0000000..1dab39e
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_nio_4_4_4.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_httpcomponents_httpmime_4_5_2.xml b/.idea/libraries/Maven__org_apache_httpcomponents_httpmime_4_5_2.xml
new file mode 100644
index 0000000..4b2025a
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_httpcomponents_httpmime_4_5_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_json_json_20160212.xml b/.idea/libraries/Maven__org_json_json_20160212.xml
new file mode 100644
index 0000000..44a278c
--- /dev/null
+++ b/.idea/libraries/Maven__org_json_json_20160212.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_jsoup_jsoup_1_9_2.xml b/.idea/libraries/Maven__org_jsoup_jsoup_1_9_2.xml
new file mode 100644
index 0000000..f38a2fc
--- /dev/null
+++ b/.idea/libraries/Maven__org_jsoup_jsoup_1_9_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_mongodb_bson_2_13_3.xml b/.idea/libraries/Maven__org_mongodb_bson_2_13_3.xml
new file mode 100644
index 0000000..9eed368
--- /dev/null
+++ b/.idea/libraries/Maven__org_mongodb_bson_2_13_3.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml
new file mode 100644
index 0000000..d375251
--- /dev/null
+++ b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..aca9be3
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..f83b4e0
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/MyCrawlerExample.iml b/MyCrawlerExample.iml
new file mode 100644
index 0000000..f3b83fd
--- /dev/null
+++ b/MyCrawlerExample.iml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/crawler/example/BasicExample.java b/src/crawler/example/BasicExample.java
index cbf327f..d53bfa2 100644
--- a/src/crawler/example/BasicExample.java
+++ b/src/crawler/example/BasicExample.java
@@ -20,7 +20,7 @@ public static void main(String[] args) {
CrawlerPack.setLoggerLevel(SimpleLog.LOG_LEVEL_OFF);
// 遠端資料路徑
- String uri = "http://.../";
+ String uri = "https://tw.yahoo.com/";
System.out.println(
CrawlerPack.start()
@@ -30,13 +30,12 @@ public static void main(String[] args) {
//.setRemoteEncoding("big5")// 設定遠端資料文件編碼
// 選擇資料格式 (三選一)
- .getFromJson(uri)
- //.getFromHtml(uri)
+ //.getFromJson(uri)
+ .getFromHtml(uri)
//.getFromXml(uri)
// 這兒開始是 Jsoup Document 物件操作
- .select(".css .selector ")
-
+ .select(".Va-tt").get(0)
);
}
}
diff --git a/src/crawler/example/HighwayStaticInfo.java b/src/crawler/example/HighwayStaticInfo.java
index a19c42c..e860d1c 100644
--- a/src/crawler/example/HighwayStaticInfo.java
+++ b/src/crawler/example/HighwayStaticInfo.java
@@ -13,7 +13,7 @@ public class HighwayStaticInfo {
public static void main(String[] args) {
// 遠端資料路徑
- String uri = "gz:http://tisvcloud.freeway.gov.tw/cms_value.xml.gz";
+ String uri = "gz:http://tisvcloud.freeway.gov.tw/roadlevel_threshold.xml.gz";
System.out.println(
CrawlerPack.start()
diff --git a/src/crawler/example/PttExample.java b/src/crawler/example/PttExample.java
new file mode 100644
index 0000000..3a0037b
--- /dev/null
+++ b/src/crawler/example/PttExample.java
@@ -0,0 +1,76 @@
+package crawler.example;
+
+import com.github.abola.crawler.CrawlerPack;
+import org.apache.commons.logging.impl.SimpleLog;
+import org.jsoup.nodes.Document;
+
+/**
+ * 爬蟲包程式的全貌,就只有這固定的模式
+ *
+ * @author Abola Lee
+ *
+ */
+public class PttExample {
+ // commit test test
+ public static void main(String[] args) {
+
+ // set to debug level
+ //CrawlerPack.setLoggerLevel(SimpleLog.LOG_LEVEL_DEBUG);
+
+ // turn off logging
+ CrawlerPack.setLoggerLevel(SimpleLog.LOG_LEVEL_OFF);
+
+ // 遠端資料路徑
+ String uri = "https://www.ptt.cc/bbs/Gossiping/M.1524893176.A.DED.html";
+
+ /*
+ System.out.println(
+ CrawlerPack.start()
+ // 參數設定
+ .addCookie("over18","1") // 設定cookie
+ //.setRemoteEncoding("big5")// 設定遠端資料文件編碼
+
+ // 選擇資料格式 (三選一)
+ //.getFromJson(uri)
+ .getFromHtml(uri)
+ //.getFromXml(uri)
+
+ // 這兒開始是 Jsoup Document 物件操作
+ //.select(".article-meta-value")
+ //.select("#main-content div.push:contains(噓) .f3.push-content")
+ //.select("#main-content ")
+ );
+ */
+
+ System.out.println(
+ CrawlerPack.start()
+ // 參數設定
+ .addCookie("over18","1") // 設定cookie
+ //.setRemoteEncoding("big5")// 設定遠端資料文件編碼
+
+ // 選擇資料格式 (三選一)
+ //.getFromJson(uri)
+ .getFromHtml(uri)
+ //.getFromXml(uri)
+
+ // 這兒開始是 Jsoup Document 物件操作
+ .select(".main-content div").remove()
+ .select(".main-content span").remove()
+ .select("#main-content").text()
+ //.select("#main-content div.push:contains(噓) .f3.push-content")
+ //.select("#main-content ")
+ );
+
+
+
+ /*
+ //只取內容,刪除div、span作法
+ Document jsoupObject = CrawlerPack.start().addCookie("over18", "1").getFromHtml(uri);
+
+ jsoupObject.select("#main-content div").remove();
+ jsoupObject.select("#main-content span").remove();
+
+ System.out.println( jsoupObject.select("#main-content").text());
+ */
+ }
+}
diff --git a/src/crawler/example/PttGetContent.java b/src/crawler/example/PttGetContent.java
new file mode 100644
index 0000000..c24c9fd
--- /dev/null
+++ b/src/crawler/example/PttGetContent.java
@@ -0,0 +1,28 @@
+package crawler.example;
+
+import com.github.abola.crawler.CrawlerPack;
+import org.jsoup.nodes.Document;
+
+
+/**
+ * 簡易練習
+ *
+ * 找出所有文章中按推的id
+ *
+ * @author Abola Lee
+ *
+ */
+public class PttGetContent {
+
+ public static void main(String[] args) {
+ String uri = "https://www.ptt.cc/bbs/Gossiping/M.1525278814.A.571.html";
+
+ Document jsoupObject = CrawlerPack.start().addCookie("over18", "1").getFromHtml(uri);
+
+ jsoupObject.select("#main-content div").remove();
+ jsoupObject.select("#main-content span").remove();
+
+
+ System.out.println( jsoupObject.select("#main-content").text());
+ }
+}
diff --git a/src/crawler/example/RealPrice.java b/src/crawler/example/RealPrice.java
index d98f0df..ad51c2f 100644
--- a/src/crawler/example/RealPrice.java
+++ b/src/crawler/example/RealPrice.java
@@ -17,11 +17,15 @@
*/
public class RealPrice {
public static void main(String[] args) {
-
+
+ /*
String uri = "zip:http://plvr.land.moi.gov.tw"
+ "/Download?type=zip&fileName=lvr_landxml.zip"
+ "!/A_LVR_LAND_A.XML";
+ */
+ String uri = "zip:http://plvr.land.moi.gov.tw/Download?type=zip&fileName=lvr_landxml.zip"
+ + "!/B_LVR_LAND_A.XML";
Document jsoupDoc = CrawlerPack.start()
.getFromXml(uri);
diff --git a/src/crawler/example/bdse07/ExamGoogleMapApi.java b/src/crawler/example/bdse07/ExamGoogleMapApi.java
new file mode 100644
index 0000000..7c96fda
--- /dev/null
+++ b/src/crawler/example/bdse07/ExamGoogleMapApi.java
@@ -0,0 +1,30 @@
+package crawler.example.bdse07;
+
+import com.github.abola.crawler.CrawlerPack;
+import org.apache.commons.logging.impl.SimpleLog;
+
+/**
+ * 練習題:請使用 Google direction API (導航)功能,完成API call,以及印出 distance
+ *
+ * 完成後同學請記得兩步動作上傳
+ * 1. git > add
+ * 2. git > commit file (一定要記得PUSH!!)
+ */
+public class ExamGoogleMapApi {
+
+ public static void main(String[] args) {
+ CrawlerPack.setLoggerLevel(SimpleLog.LOG_LEVEL_OFF);
+
+ // 遠端資料路徑 (可先在postman 完成查詢,再貼上)
+ String uri = "https://maps.googleapis.com/maps/api/directions/json?origin=25.091896,121.518145&destination=25.033509,121.543516&key=AIzaSyCE3rhrAg9_Nuxr1i-lfwTnbZ48ECkc-9c";
+
+ // 完成下方 select 部份的內容,使其可取得 distance 的內容
+ String distance =
+ CrawlerPack.start()
+ .getFromJson(uri)
+ .select("legs > distance text")
+ .text();
+
+ System.out.println("result: " + distance);
+ }
+}
diff --git a/src/crawler/example/youtube/FullExampleIntergrationToELK.java b/src/crawler/example/youtube/FullExampleIntergrationToELK.java
new file mode 100644
index 0000000..ce4b70b
--- /dev/null
+++ b/src/crawler/example/youtube/FullExampleIntergrationToELK.java
@@ -0,0 +1,227 @@
+package crawler.example.youtube;
+
+import com.github.abola.crawler.CrawlerPack;
+import com.google.common.base.Joiner;
+import com.google.common.collect.HashBasedTable;
+import com.google.common.collect.Table;
+import com.mashape.unirest.http.Unirest;
+import org.apache.commons.logging.impl.SimpleLog;
+import org.json.JSONObject;
+import org.jsoup.nodes.Element;
+
+import java.util.*;
+
+/**
+ * 透過 userid 找出相關的 channels
+ */
+public class FullExampleIntergrationToELK {
+
+ static String elasticHost = "localhost" ;
+ static String elasticPort = "9200" ;
+ static String elasticIndex = "youtube-pewdata"; // 請在後方加入帳號(ex: youtube-abola),務必全小寫字母
+ static String elasticIndexType = "data"; // 範例請不要改這行
+
+ // 設定使用者ID或頻道ID任一
+ String username = "";
+ String channelId = "UCEf_Bc-KVd7onSeifS3py9g";
+ String api_key = "AIzaSyCE3rhrAg9_Nuxr1i-lfwTnbZ48ECkc-9c";
+
+ // 使用 Guava 物件 Table 資料會像以下
+ // | row | column | value|
+ // |-----|--------|------|
+ // | id | item1 | aaa |
+ // | id | item2 | bbb |
+ // | id | item3 | ccc |
+ Table videoTable;
+
+
+ public FullExampleIntergrationToELK() throws Exception{
+ // 確認要查詢 channels 清單
+ List channels = getChannels();
+
+ // 讀取 channels 的 videos
+ for(String channelId: channels ){
+ getVideos(channelId);
+ }
+
+ // 更新每一個影片的統計資料
+ getVideoStatistics( videoTable.rowKeySet() );
+
+
+ // 將資料寫入 Elasticsearch
+ for(String row: videoTable.rowKeySet()){
+ String elasticJson = new JSONObject(videoTable.row(row)).toString();
+ sendPost("http://" + elasticHost + ":" + elasticPort
+ + "/" + elasticIndex + "/" + elasticIndexType
+ , elasticJson);
+ }
+ }
+
+
+
+ /**
+ * 讀取指定 username or channelId 所有的頻道清單
+ * @return
+ */
+ public List getChannels() throws Exception{
+ List channels = new ArrayList<>() ;
+
+ // 有指定 username,就用 username來找channels
+ if (!"".equals(username)){
+ // 讀取指定 username 所有的頻道清單
+ String uri = "https://www.googleapis.com/youtube/v3/channels?forUsername=" + username + "&part=snippet,id&key=" + api_key;
+
+ for (Element elem : CrawlerPack.start().getFromJson(uri).select("items id")) {
+ //System.out.println(elem);
+ String channelId = elem.select("id").text();
+ //String channelTitle = elem.select("title").text();
+ channels.add(channelId);
+
+ }
+ }
+ // 沒有指定username,就用指定的 channelId
+ else if(!"".equals(channelId)){
+ channels.add(channelId);
+ }
+ else{
+ throw new Exception("未輸入有效的username或channelId");
+ }
+
+ return channels;
+ }
+
+ /**
+ * 取得指定CHANNEL的影片清單
+ * @param channelId
+ * @return
+ */
+ public void getVideos(String channelId){
+ getVideos(channelId, "");
+ }
+
+ /**
+ * 取得指定CHANNEL的影片清單
+ * @param channelId
+ * @return
+ */
+ public void getVideos(String channelId, String pageToken){
+
+ // 首次進入建立TABLE物件
+ if (null == videoTable) {
+ videoTable = HashBasedTable.create();
+ }
+
+ String uri = "https://www.googleapis.com/youtube/v3/search?channelId="+channelId+
+ "&fields=items(id(videoId),snippet(title,channelTitle)),nextPageToken" +
+ "&part=snippet&order=date&maxResults=50&key="+api_key;
+
+ // 如果有指定換頁指標
+ if( !"".equals(pageToken) ){
+ uri += "&pageToken=" + pageToken;
+ }
+
+ Element results = null;
+ // 如果已達最後一頁,會因為最後一頁無資料,出現IndexOutOfBoundsException
+ try {
+ results = CrawlerPack.start().getFromJson(uri);
+ }
+ catch(java.lang.IndexOutOfBoundsException outBounds){
+ return ;
+ }
+
+ for (Element elem : results.select("items")) {
+ String videoId = elem.select("id").text();
+ String title = elem.select("title").text();
+ String channelTitle = elem.select("channelTitle").text();
+
+ // 空ID資料不處理
+ if ("".equals(videoId)) continue;
+
+ videoTable.put(videoId, "videoid", videoId);
+ videoTable.put(videoId, "title", title);
+ videoTable.put(videoId, "channelTitle", channelTitle);
+
+ }
+
+
+ String nextPageToken = results.select("nextPageToken").text();
+ if ( !"".equals(nextPageToken) ){
+ // return
+ getVideos(channelId, nextPageToken);
+ }
+ }
+
+ /**
+ * 查詢每一部影片的統計資料,50筆資料送一次REQUEST,加速處理
+ *
+ * @param videos
+ */
+ public void getVideoStatistics(Set videos){
+ int idsLimitCounter = 50;
+ List ids = new ArrayList<>();
+ // 取得 video 的統計資訊
+ for(String videoId: videos){
+ ids.add(videoId);
+ // 計數,累計至最大值才執行
+ idsLimitCounter--;
+ if ( 0 >= idsLimitCounter ){
+ // reset counter
+ idsLimitCounter = 50;
+ // Guava 指令:將集合物件使用指定的符號合併成一個字串
+ getVideoStatistics( Joiner.on(",").join(ids) );
+ ids = new ArrayList<>();
+ }
+ }
+ if (0 < ids.size()) getVideoStatistics( Joiner.on(",").join(ids) );
+ }
+
+ /**
+ * 查詢指定ID(s)的統計資料,並回填至 TABLE
+ * @param ids
+ */
+ public void getVideoStatistics(String ids){
+ System.out.println(ids);
+ String uri = "https://www.googleapis.com/youtube/v3/videos?id="+ids+
+ "&part=snippet,statistics&fields=items(id,snippet(publishedAt),statistics)"+
+ "&key="+api_key;
+
+ for (Element elem : CrawlerPack.start().getFromJson(uri).select("items")) {
+ String videoId = elem.select("id").text();
+ String publishedAt = elem.select("publishedAt").text();
+ String viewCount = elem.select("viewCount").text();
+ String likeCount = elem.select("likeCount").text();
+ String dislikeCount = elem.select("dislikeCount").text();
+ String commentCount = elem.select("commentCount").text();
+
+ videoTable.put(videoId, "publishedAt", publishedAt);
+ videoTable.put(videoId, "viewCount", viewCount);
+ videoTable.put(videoId, "likeCount", likeCount);
+ videoTable.put(videoId, "dislikeCount", dislikeCount);
+ videoTable.put(videoId, "commentCount", commentCount);
+ }
+ }
+
+
+ String sendPost(String url, String body){
+ try{
+ return Unirest.post(url)
+ .header("content-type", "text/plain")
+ .header("cache-control", "no-cache")
+ .body(body)
+ .asString().getBody();
+
+ }catch(Exception e){return "Error:" + e.getMessage();}
+ }
+
+
+ public static void main(String[] args) {
+ CrawlerPack.setLoggerLevel(SimpleLog.LOG_LEVEL_OFF);
+
+ try {
+ new FullExampleIntergrationToELK();
+ }catch(Exception ex){
+ ex.printStackTrace();
+// System.out.println(ex.getMessage());
+ }
+ }
+}