mdbs/anchor.py

from collections import defaultdict
import numpy as np
import copy
import argparse

def find_max_indices_numpy(L_dict):
    keys_arr = np.array(list(L_dict.keys()))
    values_arr = np.array(list(L_dict.values()))
    max_val_from_dict = max(L_dict.values()) # 或者 np.max(values_arr)

    indices = np.where(values_arr == max_val_from_dict)[0]
    max_keys_np_way = keys_arr[indices]
    return max_keys_np_way

def R(event_dict):
    sum = 0
    for _,v in event_dict.items():
        sum += -v
    return sum


def select_social_sensors(R, L_dict, user_event_dict, event_user_dict, b):
    """
    Parameters:
    - R: reward function, R(A) returns a numeric value
    - L_dict:  dict of node -> integer (event num)
    - user_event_dict: user -> event -> active_time
    - event_user_dict: event -> user_list
    - b: budget (numeric)

    Returns:
    - A: selected set of social sensors
    """
    print("Select social sensors begin!")
    user_event_dict = copy.deepcopy(user_event_dict)

    V = list(L_dict.keys())
    A = set()
    f = lambda A: len(A)
    all_cas = 0
    while any(s not in A for s in V) and f(A) < b:

        indices_np = find_max_indices_numpy(L_dict)
        TAR_set = set(indices_np)

        delta = {}
        cur = {}
        for s in TAR_set:
            delta[s] = float('inf')
            cur[s] = False

        c_star = []
        while True:
            s_star = max(delta, key=delta.get)
            c_star = user_event_dict[s_star]
            if cur[s_star] == True:
                A.add(s_star)
                break
            else:
                delta[s_star] = R(c_star)
                cur[s_star] = True
        all_cas += len(c_star)
        for cas_id in list(c_star.keys()):
            uc = event_user_dict[cas_id]
            for v in uc:
                if v in L_dict.keys():
                    L_dict[v] -= 1
                _ = user_event_dict[v].pop(cas_id)
        print(f"Add a social sensor, sensors num is {len(A)}")
        print(f"Anchor id: {s_star}")
        print(f"all_cas is {all_cas}")
        print(f"TAR_set size: {len(TAR_set)}")


    print(f"Select social sensors finish! Get social sensors, num: {len(A)}")
    return A

def handle_cas(filename, obs_time=-1):
    print("Handle cascade begin!")
    user_event_dict = defaultdict(dict)
    event_user_dict = defaultdict(list)
    cascades_total = 0
    with open(filename) as file:
        for line in file:

            cascades_total += 1
            # if cascades_total > 100:
            #     break
            parts = line.split(',')
            cascade_id = parts[0]
            activation_times = {}

            paths = parts[1:]

            t_max = 0
            t_min = float('inf')
            for p in paths:
                # observed adoption/participant
                nodes = p.split(':')[0].split('/')
                time_now = int(p.split(':')[1])
                if time_now > t_max:
                    t_max = time_now
                if time_now < t_min:
                    t_min = time_now
                node = nodes[-1]
                node_id = int(node)

                if time_now > obs_time and obs_time != -1:
                    continue

                if node_id in activation_times.keys():
                    activation_times[node_id] = min(time_now, activation_times[node_id])
                else:
                    activation_times[node_id] = time_now


            for k,v in activation_times.items():
                event_user_dict[cascade_id].append(k)
                if t_max > t_min:
                    user_event_dict[k][cascade_id] = (v-t_min)/(t_max-t_min)

    L_dict = {}

    for k,v in user_event_dict.items():
        L_dict[k] = len(v.keys())

    print(f"Handle cascade file finish! Users num is {len(L_dict)}")

    return L_dict, user_event_dict, event_user_dict

def generate_anchors(input_file, output_file, anchor_budget, obs_time = -1):

    L_dict, user_event_dict, event_user_dict = handle_cas(input_file, obs_time)

    num_nodes = len(L_dict.keys())
    max_anchor_num = int(num_nodes*0.02)

    if anchor_budget > max_anchor_num:
        print(f"Max anchor num is {max_anchor_num}, anchor_budget is set to {max_anchor_num}")
        anchor_budget = max_anchor_num

    A = select_social_sensors(R, L_dict, user_event_dict, event_user_dict, anchor_budget)

    with open(output_file, 'w') as file:
        for item in A:
            file.write(f"{item}\n")

def parse_args():
    parser = argparse.ArgumentParser(description='Parameters')
    parser.add_argument('--input_file', default='./dataset_for_anchor.txt', type=str, help='Cascade file')
    parser.add_argument('--output_file', default='./anchors.txt', type=str, help='Anchors save file')
    parser.add_argument('--anchor_budget', default=100, type=int, help='Anchors num')
    parser.add_argument('--obs_time', default=-1, type=int, help='Anchors observe time, default seeting is -1, meaning can observe all')
    return parser.parse_args()

if __name__ == '__main__':
    args = parse_args()
    generate_anchors(args.input_file, args.output_file, args.anchor_budget, args.obs_time)