Compare commits

..

No commits in common. "8ef75ec1c1a4d78d1f07bbf04fce932cd9ecc54a" and "a54ec047ff8539d184e731a8fa5ae3dbf424dce3" have entirely different histories.

4 changed files with 16 additions and 591 deletions

View File

@ -1,565 +0,0 @@
client:
version: v3
address: "172.16.20.2:9669,172.16.20.4:9669,172.16.20.5:9669"
user: root
password: 123456
concurrencyPerAddress: 10
reconnectInitialInterval: 1s
retry: 3
retryInitialInterval: 1s
manager:
spaceName: Y24_1206
batch: 128
readerConcurrency: 50
importerConcurrency: 512
statsInterval: 10s
hooks:
before:
- statements:
- |
USE Y24_1206;
wait: 10s
log:
level: INFO
console: true
sources:
- path: /data/y4/tools/data/user-0s.csv
csv:
delimiter: "|"
withHeader: true
tags:
- name: user
mode: INSERT
id:
type: "STRING"
index: 12 # id 字段作为VID
props:
- name: "friends_count"
type: "INT"
index: 0
- name: "listed_count"
type: "INT"
index: 1
- name: "favourites_count"
type: "INT"
index: 2
- name: "verified"
type: "STRING"
index: 3
- name: "label"
type: "STRING"
index: 4
- name: "platform"
type: "STRING"
index: 5
- name: "url"
type: "STRING"
index: 6
- name: "gather_time"
type: "STRING"
index: 7
- name: "screen_name"
type: "STRING"
index: 8
- name: "followers_count"
type: "INT"
index: 9
- name: "name"
type: "STRING"
index: 10
- name: "standpoint"
type: "STRING"
index: 11
- name: "position"
type: "STRING"
index: 13
- name: "region"
type: "STRING"
index: 14
- name: "register_location"
type: "STRING"
index: 15
- name: "party"
type: "STRING"
index: 16
- path: /data/y4/tools/data/post-0s.csv
csv:
delimiter: "|"
withHeader: true
tags:
- name: post
mode: INSERT
id:
type: "STRING"
index: 11 # id 字段作为VID
props:
- name: "comment_count"
type: "INT"
index: 0
- name: "author_name"
type: "STRING"
index: 1
- name: "repost_count"
type: "INT"
index: 2
- name: "like_count"
type: "INT"
index: 3
- name: "keywords"
type: "STRING"
index: 4
- name: "platform"
type: "STRING"
index: 5
- name: "url"
type: "STRING"
index: 6
- name: "gather_time"
type: "STRING"
index: 7
- name: "post_relationship"
type: "STRING"
index: 8
- name: "publish_time"
type: "STRING"
index: 9
- name: "root_mid"
type: "STRING"
index: 10
- name: "author_id"
type: "STRING"
index: 12
- name: "source_task"
type: "STRING"
index: 13
- name: "view_count"
type: "INT"
index: 14
- path: /data/y4/tools/data/group-0s.csv
csv:
delimiter: "|"
withHeader: true
tags:
- name: group
mode: INSERT
id:
type: "STRING"
index: 1 # id 字段作为VID
props:
- name: "name"
type: "STRING"
index: 0
- name: "member_count"
type: "INT"
index: 2
- name: "platform"
type: "STRING"
index: 3
- path: /data/y4/tools/data/organization-0s.csv
csv:
delimiter: "|"
withHeader: true
tags:
- name: organization
mode: INSERT
id:
type: "STRING"
index: 2 # id 字段作为VID
props:
- name: "name"
type: "STRING"
index: 0
- name: "industry"
type: "STRING"
index: 1
- name: "member_count"
type: "STRING"
index: 3
- name: "platform"
type: "STRING"
index: 4
- path: /data/y4/tools/data/task-0s.csv
csv:
delimiter: "|"
withHeader: true
tags:
- name: task
mode: INSERT
id:
type: "STRING"
index: 2 # id 字段作为VID
props:
- name: "keywords"
type: "STRING"
index: 0
- name: "name"
type: "STRING"
index: 1
# 边数据
- path: /data/y4/tools/data/user_follow_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_follow_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/user_follower_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_follower_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/user_friend_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_friend_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/user_have_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_have_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/post_repost_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: post_repost_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/post_cite_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: post_cite_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
# - path: /data/y4/tools/data/user_post_post-0.csv
# csv:
# delimiter: "|"
# withHeader: true
# edges:
# - name: user_post_post
# mode: INSERT
# src:
# id:
# type: "STRING"
# index: 2 # src
# dst:
# id:
# type: "STRING"
# index: 1 # dst
# props:
# - name: "extra_prop"
# type: "STRING"
# index: 0
- path: /data/y4/tools/data/user_comment_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_comment_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/post_comment_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: post_comment_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/post_multicomment_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: post_multicomment_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/user_join_group-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_join_group
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/group_include_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: group_include_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/user_participate_task-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: user_participate_task
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/task_include_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: task_include_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/post_in_task-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: post_in_task
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/task_include_post-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: task_include_post
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
- path: /data/y4/tools/data/organization_include_user-0.csv
csv:
delimiter: "|"
withHeader: true
edges:
- name: organization_include_user
mode: INSERT
src:
id:
type: "STRING"
index: 2 # src
dst:
id:
type: "STRING"
index: 1 # dst
props:
- name: "extra_prop"
type: "STRING"
index: 0
# - path: /data/y4/tools/data/user_join_organization-0.csv
# csv:
# delimiter: "|"
# withHeader: true
# edges:
# - name: user_join_organization
# mode: INSERT
# src:
# id:
# type: "STRING"
# index: 2 # src
# dst:
# id:
# type: "STRING"
# index: 1 # dst
# props:
# - name: "extra_prop"
# type: "STRING"
# index: 0

View File

@ -91,7 +91,7 @@ public class GeneratorTestData {
// 创建edge线程池
ThreadPoolExecutor edgeExecutor = new ThreadPoolExecutor(
5, // corePoolSize
7, // maximumPoolSize
10, // maximumPoolSize
60, // keepAliveTime
TimeUnit.SECONDS, // unit
new LinkedBlockingQueue<>(), // workQueue 不指定初始大小默认无界
@ -142,7 +142,7 @@ public class GeneratorTestData {
// 生成点数据
List<Map<String, String>> list = generateData(tagNumsMap.get(value.getTag()), value.getTag(), ids, count);
// 写入点数据
writeToCsv(list, finalOutputDir + "/" + value.getTag() + "s-" + count + ".csv");
writeToCsv(list, finalOutputDir + "/" + value.getTag() + "-" + count + "s.csv");
});
tagFutures.add(future);
}

View File

@ -17,27 +17,23 @@ public enum EdgeToTagEnums {
// A关注B
EDGE_FOLLOW_(EDGE_FOLLOW, TAG_USER, TAG_USER, "个体关注关系"),
// A->B A的粉丝是B
EDGE_FANS_(EDGE_FANS, TAG_USER, TAG_USER, "个体粉丝关系"),
EDGE_FANS_(EDGE_FANS, TAG_USER, TAG_USER, "个体粉丝关系"),
EDGE_USER_COMMENT_USER_(EDGE_USER_COMMENT_USER, TAG_USER, TAG_USER, "个体评论个体关系"),
UEDGE_SER_POST_POST(EDGE_USER_POST_POST, TAG_USER, TAG_POST, "个体发帖关系"),
USER_JOIN_GROUP_(USER_JOIN_GROUP, TAG_USER, TAG_GROUP, "个体属于群体关系"),
EDGE_USER_HAVE_POST_(EDGE_USER_HAVE_POST, TAG_USER, TAG_POST, "用户拥有的帖文"),
USER_JOIN_ORGANIZATION_(USER_JOIN_ORGANIZATION, TAG_USER, TAG_ORGANIZATION, "个体属于组织关系"),
EDGE_USER_PARTICIPATE_TASK_(EDGE_USER_PARTICIPATE_TASK, TAG_USER, TAG_TASK, "个体参与事件关系"),
EDGE_POST_COMMENT_POST_(EDGE_POST_COMMENT_POST, TAG_POST, TAG_POST, "帖文评论帖文"),
EDGE_USER_HAVE_POST_(EDGE_USER_HAVE_POST, TAG_USER, TAG_POST, "用户拥有的帖文"),
EDGE_POST_CITE_POST_(EDGE_POST_CITE_POST, TAG_POST, TAG_POST, "帖文引用帖文"),
EDGE_POST_MULTICOMMENT_POST_(EDGE_POST_MULTICOMMENT_POST, TAG_POST, TAG_POST, "帖文多级评论帖文关系"),
POST_REPOST_POST(EDGE_POST_FORWARD_POST, TAG_POST, TAG_POST, "帖文转发帖文"),
EDGE_POST_IN_TASK_(EDGE_POST_IN_TASK, TAG_POST, TAG_TASK, "帖文属于事件关系"),
GROUP_INCLUDE_USER_(GROUP_INCLUDE_USER, TAG_GROUP, TAG_USER, "群体包含个体关系"),
USER_JOIN_GROUP_(USER_JOIN_GROUP, TAG_USER, TAG_GROUP, "个体属于群体关系"),
EDGE_USER_PARTICIPATE_TASK_(EDGE_USER_PARTICIPATE_TASK, TAG_USER, TAG_TASK, "个体参与事件关系"),
TASK_INCLUDE_USER_(TASK_INCLUDE_USER, TAG_TASK, TAG_USER, "事件包含个体关系"),
EDGE_POST_IN_TASK_(EDGE_POST_IN_TASK, TAG_POST, TAG_TASK, "帖文属于事件关系"),
EDGE_TASK_INCLUDE_POST_(EDGE_TASK_INCLUDE_POST, TAG_TASK, TAG_POST, "事件包含帖文关系"),
ORGANIZATION_INCLUDE_USER_(ORGANIZATION_INCLUDE_USER, TAG_ORGANIZATION, TAG_USER, "组织包含个体关系"),
;
ORGANIZATION_INCLUDE_USER_(ORGANIZATION_INCLUDE_USER, TAG_ORGANIZATION, TAG_USER, "组织包含个体关系");
private final String type;
private final String srcTag;

View File

@ -16,40 +16,34 @@ public class NetworkConstants {
public static final String EDGE_FANS = "user_follower_user";
// 个体评论个体
public static final String EDGE_USER_COMMENT_USER = "user_comment_user";
// 个体属于群组
public static final String USER_JOIN_GROUP = "user_join_group";
// 个体相关的帖文包括原发转发评论等
public static final String EDGE_USER_HAVE_POST = "user_have_post";
// 个体参与事件
public static final String EDGE_USER_PARTICIPATE_TASK = "user_participate_task";
// 个体属于组织
public static final String USER_JOIN_ORGANIZATION = "user_join_organization";
// 个体发帖
public static final String EDGE_USER_POST_POST = "user_post_post";
// 帖文评论帖文
public static final String EDGE_POST_COMMENT_POST = "post_comment_post";
// 个体相关的帖文包括原发转发评论等
public static final String EDGE_USER_HAVE_POST = "user_have_post";
// 帖文引用帖文
public static final String EDGE_POST_CITE_POST = "post_cite_post";
// 帖文转发帖文
public static final String EDGE_POST_FORWARD_POST = "post_repost_post";
// 帖文多级评论
public static final String EDGE_POST_MULTICOMMENT_POST = "post_multicomment_post";
// 帖文属于事件
public static final String EDGE_POST_IN_TASK = "post_in_task";
// 群组包含个体
public static final String GROUP_INCLUDE_USER = "group_include_user";
// 个体属于群组
public static final String USER_JOIN_GROUP = "user_join_group";
// 个体参与事件
public static final String EDGE_USER_PARTICIPATE_TASK = "user_participate_task";
// 事件包含个体
public static final String TASK_INCLUDE_USER = "task_include_user";
// 帖文属于事件
public static final String EDGE_POST_IN_TASK = "post_in_task";
// 事件中包含帖文
public static final String EDGE_TASK_INCLUDE_POST = "task_include_post";
// 组织包含个体
public static final String ORGANIZATION_INCLUDE_USER = "organization_include_user";