-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGraphlaxyDataGen.py
190 lines (147 loc) · 10.5 KB
/
GraphlaxyDataGen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
import argparse
import sys
class Graphlaxy(object):
def __init__(self):
parser = argparse.ArgumentParser(
description='Tool used to create synthetic graph datasets using \'Nash Bargin Scheme\' optimization.',
usage='''gdg <command> [<args>]
The available commands are:
optimization Create a baseline dataset and optimize the parameters.
generate Using the fitted parameters generate a synthetic graph dataset.
plots Generate plots showing different characteristics of the baseline, sampled, and final datasets.
statistics Print some basic statistics of target dataset
''')
parser.add_argument('command', help='Subcommand to run')
commands = {
"optimization":self.optimization,
"generate":self.generate,
"plots": self.plot,
"statistics": self.statistics
}
args = parser.parse_args(sys.argv[1:2])
if not args.command in commands:
print('Unrecognized command')
parser.print_help()
exit(1)
commands[args.command]()
def optimization(self):
parser = argparse.ArgumentParser(description = "Steps for the optimization.",
usage='''gdg optimiation <subcommand> [<args>]
The available subcommands are:
baseline Only creates the baseline dataset
metrics Calculate the metrics of a dataset
optimize Use sampling and the Powell method with cooperative bargaining to optimize the input RMat parameters
*************************************
To run the full optimization in steps:
First, create the baseline dataset, then take the metrics and finally optimize the parameters.''')
parser.add_argument('subcommand', help='Subcommand to run')
commands = {
"baseline":self.baseline,
"metrics":self.metrics,
"optimize": self.optimize
}
args = parser.parse_args(sys.argv[2:3])
if not args.subcommand in commands:
print('Unrecognized command')
parser.print_help()
exit(1)
commands[args.subcommand]()
def generate(self):
parser = argparse.ArgumentParser(description = "Using the fitted parameters generate a synthetic graph dataset.")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where to generate the dataset.", default= "../result_dataset")
parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
help = "The size of the dataset to generate.", default= 5000)
parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")
parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "List of waights for the beta distributions.",
default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])
parser.add_argument('-F', '--from-file', action="store_true",
help = "Use a stored set of waights. Use with --parameters-file and --name parameters to indicate where to get the waights from. By seting this parameters the paramete --custom-weights gets disabled.")
parser.add_argument('-p', "--parameters-file", metavar = "str", type = str,
help = "File where the parameters are", default= "../baseline_dataset/optimized_parameters.csv")
parser.add_argument('-n', "--name", metavar = "str", type = str,
help = "An id for the parameters.", default= "result")
args = parser.parse_args(sys.argv[2:])
from processes.result_dataset import generate_result_dataset
generate_result_dataset(args.from_file, args.custom_weights, args.parameters_file, args.name, args.folder, args.dataset_size, args.edges_between, args.multiprocess)
def statistics(self):
parser = argparse.ArgumentParser(description = "Calculate some statistics over a dataset.")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where the dataset to analize was generated.", default= "data/validation_dataset")
parser.add_argument('-s', "--sample-size", metavar = "int", type = int,
help = "The size of the sample.", default= 1000)
args = parser.parse_args(sys.argv[2:])
from processes.statistics import statistics
statistics(args.folder, args.sample_size)
def plot(self):
parser = argparse.ArgumentParser(description = "Some plots to analyze the results.")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where the dataset to analize was generated.", default= "../baseline_dataset")
parser.add_argument('-v', "--validation-metrics", metavar = "str", type = str,
help = "File where the validation metrics are.", default= "data/validation_dataset/dataset_metrics.csv")
parser.add_argument('-F', "--format", metavar = "str", type = str,
help = "Format to save generated images in.", default= "svg")
parser.add_argument('-o', "--output-folder", metavar = "str", type = str,
help = "Folder where to save plots.", default= "../plots")
parser.add_argument('-s', "--sample-size", metavar = "int", type = int,
help = "The size of the sample.", default= 1000)
parser.add_argument('-sh', '--show-plots', action="store_true", help = "Show plots instead of saving them.")
choices = ["fitness_evolution", "clustering_density", "dlog_density", "density_param", "validation", "param_dlog", "param_clustering", "sample_param", "sample_paramdist", "sample_grid"]
default = ["sample_param", "sample_paramdist", "sample_grid"]
parser.add_argument('-p', "--plot-selection", nargs = '+', metavar = "str", type = str,
help = "Selects the plots to make. Posible values: {}".format(choices), default= default,
choices= choices)
parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "List of waights for the beta distributions.",
default= ((1,1,1,1,1,1,1,1)))
choices = ["custom", "initial"]
parser.add_argument('-ws', "--weight-source", metavar = "str", type = str,
help = "Where to get the waights used for the plot from. Posible values: {}".format(choices), default= "custom",
choices= choices)
parser.add_argument('-n', "--name", metavar = "str", type = str,
help = "Name of the params to use for the fitness_evolution.", default= "result")
args = parser.parse_args(sys.argv[2:])
from processes.plot import plot
plot(args.folder, args.validation_metrics, args.sample_size, args.show_plots, args.format, args.output_folder, args.plot_selection, args.custom_weights, args.weight_source, args.name)
def baseline(self):
parser = argparse.ArgumentParser(description = "Creates the baseline dataset.")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where to generate the baseline dataset.", default= "../baseline_dataset")
parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
help = "The size of the baseline dataset.", default= 10000)
parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")
args = parser.parse_args(sys.argv[3:])
from processes.baseline_dataset import generate_baseline
generate_baseline(args.folder, args.dataset_size, args.edges_between, args.multiprocess)
def metrics(self):
parser = argparse.ArgumentParser(description = "Calculate metrics of each graph in a dataset.")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where the dataset is.", default= "../baseline_dataset")
parser.add_argument('-t', "--clustering-trials", metavar = "int", type = int,
help = "Number of trials used to approximate the clustering cooeficient.", default=1000)
parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")
args = parser.parse_args(sys.argv[3:])
from processes.metrics import calculate_metrics
calculate_metrics(args.folder, args.clustering_trials, args.multiprocess)
def optimize(self):
parser = argparse.ArgumentParser(description = "Calculate metrics of each graph in a dataset.")
parser.add_argument('-n', "--name", metavar = "str", type = str,
help = "An id for the result.", default= "result")
parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where the dataset is.", default= "../baseline_dataset")
parser.add_argument('-g', "--grid-size", metavar = "int", type = int,
help = "The number of rows and columns the grid has.", default=15)
parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "Initial weights for optimization.",
default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])
args = parser.parse_args(sys.argv[3:])
from processes.optimization import optimize
optimize(args.name, args.folder, args.grid_size, args.custom_weights)
if __name__ == "__main__":
Graphlaxy()