from collections import defaultdict import numpy as np import copy import argparse def find_max_indices_numpy(L_dict): keys_arr = np.array(list(L_dict.keys())) values_arr = np.array(list(L_dict.values())) max_val_from_dict = max(L_dict.values()) # 或者 np.max(values_arr) indices = np.where(values_arr == max_val_from_dict)[0] max_keys_np_way = keys_arr[indices] return max_keys_np_way def R(event_dict): sum = 0 for _,v in event_dict.items(): sum += -v return sum def select_social_sensors(R, L_dict, user_event_dict, event_user_dict, b): """ Parameters: - R: reward function, R(A) returns a numeric value - L_dict: dict of node -> integer (event num) - user_event_dict: user -> event -> active_time - event_user_dict: event -> user_list - b: budget (numeric) Returns: - A: selected set of social sensors """ print("Select social sensors begin!") user_event_dict = copy.deepcopy(user_event_dict) V = list(L_dict.keys()) A = set() f = lambda A: len(A) all_cas = 0 while any(s not in A for s in V) and f(A) < b: indices_np = find_max_indices_numpy(L_dict) TAR_set = set(indices_np) delta = {} cur = {} for s in TAR_set: delta[s] = float('inf') cur[s] = False c_star = [] while True: s_star = max(delta, key=delta.get) c_star = user_event_dict[s_star] if cur[s_star] == True: A.add(s_star) break else: delta[s_star] = R(c_star) cur[s_star] = True all_cas += len(c_star) for cas_id in list(c_star.keys()): uc = event_user_dict[cas_id] for v in uc: if v in L_dict.keys(): L_dict[v] -= 1 _ = user_event_dict[v].pop(cas_id) print(f"Add a social sensor, sensors num is {len(A)}") print(f"Anchor id: {s_star}") print(f"all_cas is {all_cas}") print(f"TAR_set size: {len(TAR_set)}") print(f"Select social sensors finish! Get social sensors, num: {len(A)}") return A def handle_cas(filename, obs_time=-1): print("Handle cascade begin!") user_event_dict = defaultdict(dict) event_user_dict = defaultdict(list) cascades_total = 0 with open(filename) as file: for line in file: cascades_total += 1 # if cascades_total > 100: # break parts = line.split(',') cascade_id = parts[0] activation_times = {} paths = parts[1:] t_max = 0 t_min = float('inf') for p in paths: # observed adoption/participant nodes = p.split(':')[0].split('/') time_now = int(p.split(':')[1]) if time_now > t_max: t_max = time_now if time_now < t_min: t_min = time_now node = nodes[-1] node_id = int(node) if time_now > obs_time and obs_time != -1: continue if node_id in activation_times.keys(): activation_times[node_id] = min(time_now, activation_times[node_id]) else: activation_times[node_id] = time_now for k,v in activation_times.items(): event_user_dict[cascade_id].append(k) if t_max > t_min: user_event_dict[k][cascade_id] = (v-t_min)/(t_max-t_min) L_dict = {} for k,v in user_event_dict.items(): L_dict[k] = len(v.keys()) print(f"Handle cascade file finish! Users num is {len(L_dict)}") return L_dict, user_event_dict, event_user_dict def generate_anchors(input_file, output_file, anchor_budget, obs_time = -1): L_dict, user_event_dict, event_user_dict = handle_cas(input_file, obs_time) num_nodes = len(L_dict.keys()) max_anchor_num = int(num_nodes*0.02) if anchor_budget > max_anchor_num: print(f"Max anchor num is {max_anchor_num}, anchor_budget is set to {max_anchor_num}") anchor_budget = max_anchor_num A = select_social_sensors(R, L_dict, user_event_dict, event_user_dict, anchor_budget) with open(output_file, 'w') as file: for item in A: file.write(f"{item}\n") def parse_args(): parser = argparse.ArgumentParser(description='Parameters') parser.add_argument('--input_file', default='./dataset_for_anchor.txt', type=str, help='Cascade file') parser.add_argument('--output_file', default='./anchors.txt', type=str, help='Anchors save file') parser.add_argument('--anchor_budget', default=100, type=int, help='Anchors num') parser.add_argument('--obs_time', default=-1, type=int, help='Anchors observe time, default seeting is -1, meaning can observe all') return parser.parse_args() if __name__ == '__main__': args = parse_args() generate_anchors(args.input_file, args.output_file, args.anchor_budget, args.obs_time)