Skip to content

TEA Plot

TEA Plots

TEA(temp_edgelist, filepath='.', fig_size=(7, 5), font_size=20, network_name=None, time_scale=None, real_dates=None, test_split=False, density=False)

generating TEA plot

Parameters:

Name Type Description Default
temp_edgelist Union[object, dict]

a dictionary of temporal edges or a dataset object.

required
filepath Optional[str]

Path to save the TEA Plot.

'.'
fig_size tuple

Size of the figure to save.

(7, 5)
font_size int

Size of the text in the figure.

20
network_name str

Name of the dataset to be used in the TEA plot file.

None
time_scale Union[str, int]

time_scale for discretizing data if already not done.

None
real_dates bool

Whether to use the real dates from dataset.

None
test_split bool

Whether show the test split on the plot.

False
density bool

Whether to return edge density and edge frequency dictioneries.

False
Source code in tgx/viz/TEA.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def TEA(
        temp_edgelist : Union[object, dict], 
        filepath : Optional[str] = ".",
        fig_size : tuple = (7,5),
        font_size : int = 20, 
        network_name : str = None,
        time_scale : Union[str, int] = None, 
        real_dates : bool = None,
        test_split : bool = False,
        density : bool = False
        ):
    r"""
    generating TEA plot

    Parameters:
        temp_edgelist: a dictionary of temporal edges or a dataset object.
        filepath: Path to save the TEA Plot.
        fig_size: Size of the figure to save.
        font_size: Size of the text in the figure.
        network_name: Name of the dataset to be used in the TEA plot file.
        time_scale: time_scale for discretizing data if already not done.
        real_dates: Whether to use the real dates from dataset.
        test_split: Whether show the test split on the plot.
        density: Whether to return edge density and edge frequency dictioneries.
    """
    if isinstance(temp_edgelist, object):
        if temp_edgelist.freq_data is None:
            temp_edgelist.count_freq()
        temp_edgelist = temp_edgelist.freq_data

    # check number of unique timestamps:
    unique_ts = list(temp_edgelist.keys())
    # if len(unique_ts) > max_time_scale:
    #     inp = input(f"There are {unique_ts} timestamps in the data.\nDo you want to discretize the data to 1000 timestamps?(y/n)").lower()
    #     if inp == "y":
    #         temp_edgelist = edgelist_discritizer(temp_edgelist,
    #                                             unique_ts,
    #                                             time_scale = max_time_scale)
    if time_scale is not None:
        temp_edgelist = discretize_edges(temp_edgelist,
                                        time_scale = time_scale)


    ts_edges_dist, ts_edges_dist_density, edge_frequency_dict = TEA_process_edgelist_per_timestamp(temp_edgelist)

    TEA_plot_edges_bar(ts_edges_dist, 
                       filepath = filepath, 
                       fig_size = fig_size, 
                       font_size = font_size, 
                       network_name=network_name,
                       real_dates = real_dates,
                       test_split = test_split)

    if density:
        return ts_edges_dist_density, edge_frequency_dict

TEA_plot_edges_bar(ts_edges_dist, filepath='.', fig_size=(9, 5), font_size=20, network_name=None, real_dates=None, time_scale=None, test_split=False, show=False)

Making TEA plot and save into pdf file. Args: ts_edges_dist: list of dictionaries containing the edge distribution over time. filepath: Path to save the TEA Plot. fig_size: Size of the figure to save. font_size: Size of the text in the figure. network_name: Name of the dataset to be used in the TEA plot file. real_dates: list of real dates as ticks time_scale: time_scale for discretizing data if already not done. test_split: Whether show the test split on the plot. show: Whether to show the plot.

Source code in tgx/viz/TEA.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def TEA_plot_edges_bar(ts_edges_dist: list, 
                   filepath: str = ".", 
                   fig_size: list = (9,5),
                   font_size: int = 20,
                   network_name: str = None,
                   real_dates: list = None,
                   time_scale: list = None,
                   test_split: bool = False,
                   show: bool =False):
    r"""
    Making TEA plot and save into pdf file.
    Args:
        ts_edges_dist: list of dictionaries containing the edge distribution over time.
        filepath: Path to save the TEA Plot.
        fig_size: Size of the figure to save.
        font_size: Size of the text in the figure.
        network_name: Name of the dataset to be used in the TEA plot file.
        real_dates: list of real dates as ticks
        time_scale: time_scale for discretizing data if already not done.
        test_split: Whether show the test split on the plot.
        show: Whether to show the plot.
    """


    ts_edges_dist_df = pd.DataFrame(ts_edges_dist, columns=['ts', 'new', 'repeated',
                                                            'not_repeated',
                                                            'total_curr_ts',
                                                            'total_seen_until_curr_ts'])


    ### Additional Stats ###
    mean = ts_edges_dist_df.mean(axis=0)
    # print("INFO: Network Name:", network_name)
    # print("INFO: AVG. stats. over all timestamps: ", mean)
    # print("INFO: ratio of avg.(new)/avg.(total_curr_ts): {:.2f}".format(mean['new'] / mean['total_curr_ts']))
    ###

    fig, ax = plt.subplots(figsize=fig_size)  # lastfm, mooc, reddit, UNtrade, UNvote
    plt.subplots_adjust(bottom=0.2, left=0.2)
    font_size = font_size
    ticks_font_size = 15
    plt.yticks(fontsize=ticks_font_size)
    plt.xticks(fontsize=ticks_font_size)
    if real_dates is not None:
        start = real_dates[0]
        end = real_dates[1]
        metric = real_dates[2]
        create_ts_list(start, end, metric=metric, interval=time_scale)
    else:
        duration = ts_edges_dist_df['ts'].tolist()
        timestamps = [i for i in range(len(duration))]

    new = ts_edges_dist_df['new'].tolist()
    repeated = ts_edges_dist_df['repeated'].tolist()
    # print(len(timestamps), repeated, new)
    # plotting stuffs
    # bar plot
    plt.bar(timestamps, repeated, label='Repeated', color='#404040', alpha=0.4)
    plt.bar(timestamps, new, label='New', bottom=repeated, color='#ca0020', alpha=0.8, hatch='//')
    # test split line
    if test_split:
        plt.axvline(x=(timestamps[int(0.85 * len(timestamps))]), color="blue", linestyle="--", linewidth=2)
        plt.text((timestamps[int(0.85 * len(timestamps))]), 0,
                'x', va='center', ha='center', fontsize=font_size, fontweight='heavy', color='blue')

    plt.margins(x=0)
    plt.xlabel("Timestamp", fontsize=font_size)
    plt.ylabel("Number of edges", fontsize=font_size)
    plt.legend(fontsize = 13)
    if filepath is not None:
        plt.savefig(f"{filepath}/{network_name}_TEA.pdf")
        print("plot saved as " + f"{filepath}/{network_name}_TEA.pdf")
    if (show):
        plt.show()