Litmus/litmus/litmus_mixing.py

1261 строка
40 KiB
Python

import os
import re
import sys
import time
import json
import pkgutil
import argparse
import collections
from tqdm import tqdm
import pickle
import copy
from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.facecolor"] = "white"
import numpy as np
import pandas as pd
import xgboost as xgb
import sklearn.linear_model
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.neural_network
import pdb
# Stopgap to enable use as module & standalone script
if __name__ == "__main__":
from config import (
SUPPORTED_MODELS,
SUPPORTED_MODES,
SUPPORTED_DATA_FORMATS,
MODEL2LANGS,
LANG2INDEX,
)
PRECOMPUTED_FEATURE_FILE = "data/precomputed_features.json"
else:
from litmus.config import (
SUPPORTED_MODELS,
SUPPORTED_MODES,
SUPPORTED_DATA_FORMATS,
MODEL2LANGS,
LANG2INDEX,
)
PRECOMPUTED_FEATURE_FILE = ""
class Featurizer:
def __init__(self, model, precomputed_feature_file, pivot_features):
self.langs_list = MODEL2LANGS[model]
self.pivot_features = pivot_features
if precomputed_feature_file != "":
with open(precomputed_feature_file) as fin:
precomputed = json.load(fin)
else:
pc_data = pkgutil.get_data(__name__, "data/precomputed_features.min.json")
precomputed = json.loads(pc_data.decode("utf-8"))
self.precomputed_type_overlaps = precomputed["type_overlap"][model]
self.precomputed_syntactic_distance = precomputed["syntactic_distance"][model]
self.regression_feats = pd.DataFrame(
precomputed["regression_feats"][model], index=self.langs_list
)
def featurize(self, langs_target, langs_pivot, task_sizes):
syntactic_distance = [
[
self.precomputed_syntactic_distance[self.langs_list.index(pivot)][
self.langs_list.index(lang)
]
for lang in langs_target
]
for pivot in langs_pivot
]
type_overlaps = [
[self.precomputed_type_overlaps[pivot][lang] for lang in langs_target]
for pivot in langs_pivot
]
pivot_fts = [
self.regression_feats.loc[langs_target].values,
]
if self.pivot_features == "data_only":
for i in range(len(langs_pivot)):
if task_sizes[i] == 0:
syntactic_distance[i] = [0 for _ in range(len(langs_target))]
type_overlaps[i] = [0 for _ in range(len(langs_target))]
if self.pivot_features != "none":
pivot_fts.append(np.array(type_overlaps).transpose())
pivot_fts.append(np.array(syntactic_distance).transpose())
task_size = [task_sizes for i in range(len(langs_target))]
pivot_fts.append(np.array(task_size).transpose(0, 1))
return np.concatenate(pivot_fts, axis=1)
def pretty_print(list_to_print, sep=" "):
list_to_print = [
[
str(x) if not isinstance(x, (np.floating, float)) else "{0:0.4f}".format(x)
for x in t
]
for t in list_to_print
]
n_cols = len(list_to_print[0])
max_l = [max(len(t[i]) for t in list_to_print) for i in range(n_cols)]
format_string = ["{:<" + str(m) + "}" for m in max_l[:-1]]
format_string.append("{}")
format_string = sep.join(format_string)
for i in range(len(list_to_print)):
print(format_string.format(*list_to_print[i]))
class CustomMinMaxScaler(sklearn.preprocessing.MinMaxScaler):
def _handle_zeros_in_scale(scale, copy=True):
# if we are fitting on 1D arrays, scale might be a scalar
if np.isscalar(scale):
if scale == 0.0:
scale = 1.0
return scale
elif isinstance(scale, np.ndarray):
if copy:
# New array to avoid side-effects
scale = scale.copy()
scale[scale == 0.0] = 1.0
return scale
def __init__(self, common_feats=None, **kwargs):
super().__init__(**kwargs)
self.common_feats = common_feats
def fit(self, X, y=None):
first_pass = not hasattr(self, "n_samples_seen_")
if not first_pass:
return self
else:
return super().fit(X, y)
def partial_fit(self, X, y=None):
super().partial_fit(X, y)
if self.common_feats:
for group in self.common_feats:
group = np.array(group)
self.data_min_[group] = np.min(self.data_min_[group])
self.data_max_[group] = np.max(self.data_max_[group])
self.data_range_[group] = self.data_max_[group] - self.data_min_[group]
self.scale_[group] = (
self.feature_range[1] - self.feature_range[0]
) / CustomMinMaxScaler._handle_zeros_in_scale(self.data_range_[group])
self.min_[group] = (
self.feature_range[0] - self.data_min_[group] * self.scale_[group]
)
return self
def regression(X, Y, common_feats, training_algorithm, load_model, model):
fit_kwargs = {}
if model or load_model:
if load_model:
with open(load_model, "rb") as f:
model = pickle.load(f)
elif model:
model = copy.deepcopy(model)
if training_algorithm == "mlp":
model.named_steps["regressor"].warm_start = True
elif training_algorithm == "xgboost":
fit_kwargs["regressor__xgb_model"] = model.named_steps[
"regressor"
].get_booster()
else:
if training_algorithm == "mlp":
model = sklearn.pipeline.Pipeline(
[
(
"scaler",
CustomMinMaxScaler(common_feats=common_feats, clip=True),
),
("regressor", sklearn.neural_network.MLPRegressor((50, 50))),
]
)
elif training_algorithm == "xgboost":
model = sklearn.pipeline.Pipeline(
[
(
"scaler",
CustomMinMaxScaler(common_feats=common_feats, clip=True),
),
(
"regressor",
xgb.XGBRegressor(
objective="reg:squarederror",
learning_rate=0.1,
n_estimators=100,
max_depth=10,
),
),
]
)
if X.shape[0] > 0:
model.fit(X, Y, **fit_kwargs)
return model
def prepare_data(args):
model = args.model_name
langs_list = MODEL2LANGS[model]
featurizer = Featurizer(model, args.precomputed_features, args.pivot_features)
if args.use_all_langs:
all_langs = set(MODEL2LANGS["mbert"]) | set(MODEL2LANGS["xlmr"])
else:
all_langs = set(langs_list)
X_array, Y_array = [], []
examples = []
if args.train_format == "json":
with open(args.scores_file) as f:
scores = json.load(f)
for entry in scores:
train_langs = entry["train_config"].keys()
entry["train_config"].update({k: 0 for k in all_langs - train_langs})
train_config = collections.OrderedDict(
sorted(entry["train_config"].items())
)
eval_results = collections.OrderedDict(
sorted(entry["eval_results"].items())
)
pivots_to_delete = [x for x in train_config if x not in langs_list]
targets_to_delete = [x for x in eval_results if x not in langs_list]
[train_config.pop(key) for key in pivots_to_delete]
[eval_results.pop(key) for key in targets_to_delete]
X_array.append(
featurizer.featurize(
list(eval_results.keys()),
list(train_config.keys()),
list(train_config.values()),
)
)
Y_array.append(list(eval_results.values()))
examples.extend([[train_config, t, l] for l, t in eval_results.items()])
langs_pivot = list(train_config.keys())
langs_target = list(eval_results.keys())
elif args.train_format == "csv":
df = pd.read_csv(args.scores_file)
langs_target = set()
for i, row in df.iterrows():
if row["target_lang"] not in langs_list:
continue
train_langs = row["train_langs"].split(",")
data_sizes = [float(x) for x in row["train_data_sizes"].split(",")]
values = {k: v for k, v in zip(train_langs, data_sizes)}
values.update({k: 0 for k in all_langs - set(train_langs)})
train_config = collections.OrderedDict(sorted(values.items()))
pivots_to_delete = [x for x in train_config if x not in langs_list]
[train_config.pop(key) for key in pivots_to_delete]
langs_target.add(row["target_lang"])
X_array.append(
featurizer.featurize(
[row["target_lang"]],
list(train_config.keys()),
list(train_config.values()),
)
)
Y_array.append(row["score"])
langs_target = list(langs_target)
langs_pivot = list(train_config.keys())
# Reshape datasets
X = np.concatenate(X_array)
Y = np.array(Y_array).reshape(-1)
# Establish feature-set
feat_names = ["Data Size", "Well rep features"]
common_feats = []
if args.pivot_features != "none":
feat_names += ["Type Overlap {}".format(lang) for lang in langs_pivot] + [
"Syntactic Distance {}".format(lang) for lang in langs_pivot
]
if args.common_scaling:
start = 2
mid = start + len(langs_pivot)
end = mid + len(langs_pivot)
common_feats.append([_ for _ in range(start, mid)])
common_feats.append([_ for _ in range(mid, end)])
feat_names += ["Task data size {}".format(lang) for lang in langs_pivot]
if args.common_scaling:
start = 2 + (2 * len(langs_pivot) if args.pivot_features != "none" else 0)
end = start + len(langs_pivot)
common_feats.append([_ for _ in range(start, end)])
return (
model,
langs_list,
feat_names,
featurizer,
common_feats,
X,
Y,
langs_pivot,
langs_target,
examples,
)
"""
Parse user-specified pivot-size configurations
"""
def prepare_inf_data(args, langs_list):
data_sizes = args.data_sizes
if isinstance(data_sizes, str) and data_sizes != "":
sizes = re.sub(r"\s+", "", data_sizes)
data_sizes = []
for row in sizes.split(";"):
sizes_map = {
lang: int(size)
for el in row.split(",")
for lang, size in [el.split(":")]
}
data_sizes += [[sizes_map.get(lang, 0) for lang in langs_list]]
data_sizes = np.array(data_sizes)
tgt_langs = (
args.heatmap_targets if args.heatmap_targets else args.suggestions_targets
)
tgt_langs = re.sub(r"\s+", "", tgt_langs).split(",")
# pdb.set_trace()
return data_sizes, tgt_langs
def train_model(
X, Y, feat_names, common_feats, langs_pivot, langs_target, examples, args
):
error_method = args.error_method
bprint = args.print
train_indices = args.train_indices
test_indices = args.test_indices
if args.model and args.load_model:
raise ValueError("Cannot specify model load_model. Only specify one")
model = regression(
X, Y, common_feats, args.training_algorithm, args.load_model, args.model
)
avg_error, std_error = None, None
num_targets = len(langs_target)
num_pivots = len(langs_pivot)
train_function = regression
examples_indices = None
predictions = None
if error_method == "split":
# Split examples into train and test split to compute error on test split alone
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
X, Y, random_state=42
)
res1 = train_function(
X_train,
Y_train,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X_test)
errors = abs(Y_test - Y_pred)
baseline_errors = abs(np.mean(Y_train) - Y_test)
elif error_method == "kfold":
# Use 5 fold CV to compute error over all examples
kf = sklearn.model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
errors = []
predictions = []
examples_indices = []
baseline_errors = []
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
res1 = train_function(
X_train,
Y_train,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X_test)
error = abs((Y_pred - Y_test))
errors.extend(error)
examples_indices.extend(test_index)
predictions.extend(Y_pred)
baseline_errors.extend(abs(np.mean(Y_train) - Y_test))
elif error_method == "manual_split":
# Use train and test splits supplied in args
X_train, Y_train = X[train_indices], Y[train_indices]
X_test, Y_test = X[test_indices], Y[test_indices]
res1 = train_function(
X_train,
Y_train,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X_test)
errors = abs(Y_test - Y_pred)
baseline_errors = abs(np.mean(Y_train) - Y_test)
elif error_method == "LOTO":
# Leave one target out scenario, predict for the left out column of elements and compute errors
errors = []
baseline_errors = []
for t in tqdm(range(num_targets)):
indices_to_delete = [_ * num_targets + t for _ in range(num_pivots)]
X_reg = np.delete(X, indices_to_delete, axis=0)
Y_reg = np.delete(Y, indices_to_delete, axis=0)
Y_gold = Y[indices_to_delete]
res1 = regression(
X_reg,
Y_reg,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X[indices_to_delete])
error = abs((Y_gold - Y_pred))
errors.extend(error)
baseline_errors.extend(abs(np.mean(Y_reg) - Y_gold))
elif error_method == "LOO":
# Leave one out scenario, predict for the left out element and compute errors
errors = []
baseline_errors = []
for _ in tqdm(range(X.shape[0])):
X_reg = np.delete(X, _, axis=0)
Y_reg = np.delete(Y, _, axis=0)
Y_gold = Y[_]
res1 = regression(
X_reg,
Y_reg,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X[_].reshape(1, -1))[0]
error = abs((Y_gold - Y_pred))
errors.append(error)
baseline_errors.append(abs(np.mean(Y_reg) - Y_gold))
elif error_method == "LOCO":
# Leave one configuration out scenario
errors = []
predictions = []
examples_indices = []
baseline_errors = []
num_tgts = len(langs_target)
for i in tqdm(range(0, len(examples), num_tgts)):
test_index = list(range(i, i + num_tgts))
train_index = list(range(i)) + list(range(i + num_tgts, len(examples)))
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
res1 = train_function(
X_train,
Y_train,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X_test)
error = abs((Y_pred - Y_test))
errors.extend(error)
examples_indices.extend(test_index)
predictions.extend(Y_pred)
baseline_errors.extend(abs(np.mean(Y_train) - Y_test))
elif error_method == "LOLO":
# Leave one language out scenario, remove all instances of a language (in train_config as well as results) and move to test.
errors = []
predictions = []
examples_indices = []
baseline_errors = []
for lang in tqdm(langs_target):
train_index, test_index = [], []
for i, example in enumerate(examples):
# If language is present neither as a pivot or target
if lang != example[-1] and example[0][lang] == 0:
train_index.append(i)
# If language is present as a target put it in test
elif lang == example[-1]:
test_index.append(i)
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
res1 = train_function(
X_train,
Y_train,
common_feats,
args.training_algorithm,
args.load_model,
args.model,
)
Y_pred = res1.predict(X_test)
error = abs((Y_pred - Y_test))
errors.extend(error)
examples_indices.extend(test_index)
predictions.extend(Y_pred)
baseline_errors.extend(abs(np.mean(Y_train) - Y_test))
avg_error, std_error = np.mean(errors), np.std(errors)
baseline_error, baseline_std_error = np.mean(baseline_errors), np.std(
baseline_errors
)
if bprint:
print("Avg Pred Error: {0:0.6f}".format(avg_error))
print("Std Pred Error: {0:0.6f}".format(std_error))
print("Baseline Error: {0:0.6f}".format(baseline_error))
print("Std Baseline Error: {0:0.6f}".format(baseline_std_error))
if args.save_model:
with open(args.save_model, "wb") as f:
pickle.dump(model, f)
return (
model,
(avg_error, std_error),
(baseline_error, baseline_std_error),
errors,
examples_indices,
predictions,
)
def build_acc_matrix(langs_pivot, langs_target, featurizer, model, data_sizes):
X = np.concatenate(
[
featurizer.featurize(langs_target, langs_pivot, data_sizes[_])
for _ in range(len(data_sizes))
]
)
Y = model.predict(X)
Y = Y.reshape(len(data_sizes), len(langs_target))
return Y
"""
Enforcing language-sparsity constraint on search space
"""
def find_suggestions(
args,
model,
featurizer,
langs_list,
budget,
lang_budgets,
targets,
pivots,
augmentable,
weights,
data_sizes,
is_exp_grid,
objective,
min_perf,
min_lang_perf,
):
# Helpers
rmSpaces = lambda s: re.sub(r"\s+", "", s)
parseLangData = lambda s, t: {
kv.split(":")[0]: t(kv.split(":")[1])
for kv in rmSpaces(s).split(",")
if kv != ""
}
printInfo = lambda s: print(s) if args.suggestions_verbose else None
# Parse configuration
targets, augmentable = rmSpaces(targets), rmSpaces(augmentable)
targets = set(targets.split(","))
augmentable = set(augmentable.split(",")) if augmentable != "" else targets
weights = parseLangData(weights, int)
lang_budgets = parseLangData(lang_budgets, int)
min_perf = float(min_perf) if min_perf.strip() != "" else 0
min_lang_perf = parseLangData(min_lang_perf, float)
assert len(targets) > 0 and len(pivots) > 0 and len(augmentable) > 0
langs_list = sorted(langs_list)
langs_pvts = [lang for lang in langs_list]
langs_tgts = [lang for lang in langs_list if lang in targets]
langs_wts = [weights.get(lang, 1) for lang in langs_tgts]
orig_sizes = tuple(pivots)
# Helpers
TgtPerfs = lambda sizes: model.predict(
featurizer.featurize(langs_tgts, langs_pvts, sizes)
)
if objective == "avg":
AvgPerf = lambda sizes: np.average(TgtPerfs(sizes), weights=langs_wts)
else:
AvgPerf = lambda sizes: np.amin(TgtPerfs(sizes))
SimpleAugSet = lambda sizes: [
(lang, sizes[idx]) for idx, lang in enumerate(langs_pvts) if sizes[idx] > 0
]
baseline_perf = AvgPerf(np.array(orig_sizes))
grid_linear_step = [
0.1 * lang_budgets.get(lang, budget) for idx, lang in enumerate(langs_pvts)
]
class SearchNode:
ctr_eval = 0
def __init__(self, sizes):
self.sizes = sizes
self.is_terminal = sum(sizes) <= budget
self.score = None
self.perf = None
self.tgt_perfs = None
def Eval(self):
if self.score == None:
SearchNode.ctr_eval += 1
self.score = self.Perf()
return self.score
def Perf(self):
if self.perf == None:
self.tgt_perfs = TgtPerfs(np.array(orig_sizes) + np.array(self.sizes))
self.perf = AvgPerf(np.array(orig_sizes) + np.array(self.sizes))
return self.perf
def Augment(self, lidx):
if self.sizes[lidx] == 0:
return None
if is_exp_grid:
new_sizes = tuple(
[
el // 2 if idx == lidx else el
for idx, el in enumerate(self.sizes)
]
)
else:
new_sizes = tuple(
[
el - grid_linear_step[idx] if idx == lidx else el
for idx, el in enumerate(self.sizes)
]
)
return SearchNode(new_sizes)
def Expand(self):
if self.is_terminal:
return [self]
frontier = [
augNode
for idx, lang in enumerate(langs_pvts)
if lang in augmentable
for augNode in [self.Augment(idx)]
if augNode != None
]
if not len(frontier):
self.is_terminal = True
return [self]
else:
return frontier
def __hash__(self):
return hash((self.sizes, self.is_terminal))
def __eq__(self, other):
return (
other != None
and self.sizes == other.sizes
and self.is_terminal == other.is_terminal
)
def Print(self):
print(tuple(SimpleAugSet(self.sizes)), self.score, self.is_terminal)
# Search params
BEAM_WIDTH = 5
PRUNE_LANG_QLTY_THRESHOLD = 0.02
t0 = time.time()
# Actual search
# Using list representation for beam, sorting beam is cheap as beam-width is small :P
start_size = budget // 2 if is_exp_grid else budget
theoretic_max_case = tuple(
[
lang_budgets.get(lang, start_size) if lang in augmentable else 0
for idx, lang in enumerate(langs_pvts)
]
)
beam = [SearchNode(theoretic_max_case)]
while any(not node.is_terminal for node in beam):
beam = [f for node in beam for f in node.Expand()]
beam = list(set(beam))
# batch mode scoring
inps = np.concatenate(
[
featurizer.featurize(
langs_tgts, langs_pvts, np.array(orig_sizes) + np.array(node.sizes)
)
for node in beam
]
)
outs = model.predict(inps).reshape(len(beam), len(langs_tgts))
if objective == "avg":
scores = np.average(outs, axis=1, weights=langs_wts)
else:
scores = np.amin(outs, axis=1)
for idx, score in enumerate(list(scores)):
beam[idx].score = score
beam[idx].tgt_perfs = outs[idx, :]
SearchNode.ctr_eval += len(beam)
# Apply constraints on beam candidates
beam = [
node
for node in beam
if node.Eval() >= min_perf
and all(
lang_perf >= min_lang_perf[lang]
for lang, lang_perf in zip(langs_tgts, node.tgt_perfs)
if lang in min_lang_perf
)
]
# Retain top-K candidates
beam = sorted(beam, key=lambda x: x.Eval(), reverse=True)
beam = beam[:BEAM_WIDTH]
if args.suggestions_verbose:
print("Beam:")
[node.Print() for node in beam]
if len(beam) == 0:
best = None
else:
# Cleanup best solution:
# Remove lowest aug langs iteratively as long as drop in gains is less than 5%
best = beam[0]
best_gains = best.Perf() - baseline_perf
sorted_aug_langs = sorted(
[(langs_pvts[idx], size, idx) for idx, size in enumerate(best.sizes)],
key=lambda x: x[1],
)
for aug_lang, aug_size, aug_idx in sorted_aug_langs:
while best.sizes[aug_idx] > 0:
curr = best.Augment(aug_idx)
curr_gains = curr.Perf() - baseline_perf
printInfo(
"Attempting reduction of lang (%s, %d, %d) with gains delta (%.4f - %.4f)..."
% (aug_lang, curr.sizes[aug_idx], aug_idx, curr_gains, best_gains)
)
if (
curr_gains > best_gains
or 1.0 - curr_gains / best_gains < PRUNE_LANG_QLTY_THRESHOLD
):
printInfo("Reduced lang %s..." % aug_lang)
best = curr
else:
break
t1 = time.time()
equal_aug_sizes = np.array(orig_sizes) + np.array(
[
(budget // len(augmentable)) if lang in augmentable else 0
for lang in langs_pvts
]
)
return {
"search-stats": {
"num_nodes_searched": SearchNode.ctr_eval,
"time-taken": t1 - t0,
"budget": budget,
"used_budget": sum(best.sizes) if best != None else 0,
},
"search-perfs": {
"baseline-perf": AvgPerf(orig_sizes),
"augmented-perf": best.Perf() if best != None else 0,
"max-perf": AvgPerf(np.array(orig_sizes) + np.array(theoretic_max_case)),
"equal-aug-perf": AvgPerf(equal_aug_sizes),
},
"lang-perfs": {
"before-aug": {
tgt: score for tgt, score in zip(langs_tgts, TgtPerfs(orig_sizes))
},
"after-aug": {
tgt: score
for tgt, score in zip(
langs_tgts, TgtPerfs(np.array(orig_sizes) + np.array(best.sizes))
)
}
if best != None
else {},
"equal-aug": {
tgt: score for tgt, score in zip(langs_tgts, TgtPerfs(equal_aug_sizes))
},
},
"augments": SimpleAugSet(best.sizes) if best != None else {},
"ablation": {
langs_pvts[idx]: best.Augment(idx).Perf() - best.Perf()
for idx, size in enumerate(best.sizes)
if size > 0
}
if best != None
else {},
}
"""
Returns predicted performance in targets for diff pivot data_sizes
"""
def get_user_config_perfs(model, featurizer, langs_list, targets, data_sizes):
# Parse params
# pdb.set_trace()
targets = set(targets)
langs_tgts = [lang for lang in langs_list if lang in targets]
langs_pvts = langs_list
# Helpers
TgtPerfs = lambda sizes: model.predict(
featurizer.featurize(langs_tgts, langs_pvts, sizes)
)
AvgPerf = lambda sizes: np.average(TgtPerfs(sizes))
SimpleAugSet = lambda sizes: [
(lang, sizes[idx]) for idx, lang in enumerate(langs_pvts) if sizes[idx] > 0
]
# Add tgt-score-distrib for best performing user-specified config
best_user_config, best_user_config_idx, _ = max(
[
(user_config, row_idx, AvgPerf(user_config))
for row_idx, user_config in enumerate(data_sizes.tolist())
],
key=lambda x: x[2],
)
best_user_config_perfs = TgtPerfs(best_user_config)
tgt_perfs = [
TgtPerfs(user_config) for row_idx, user_config in enumerate(data_sizes.tolist())
]
return {
"best-config-idx": best_user_config_idx,
"best-config": SimpleAugSet(best_user_config),
"best-tgt-perfs": list(zip(langs_tgts, best_user_config_perfs)),
"tgt_perfs": tgt_perfs,
}
"""
Plots heatmap of predicted performance in langs_tgts for given data_sizes
"""
def plot_perf_heatmap(
langs_list, langs_pivot, langs_tgts, data_sizes, model, featurizer, output_dir
):
langs_tgts = [t for t in langs_tgts if t in langs_list]
Y = build_acc_matrix(langs_pivot, langs_tgts, featurizer, model, data_sizes)
print("vsize", data_sizes.shape[0])
fig = plt.figure(figsize=(6, 3), dpi=300)
plt.rc("xtick", labelsize=10)
plt.rc("ytick", labelsize=10)
ax = sns.heatmap(
Y,
cmap="RdYlGn",
cbar=True,
cbar_kws={"orientation": "horizontal"},
yticklabels=[
"Config-%d" % (ConfigIdx + 1) for ConfigIdx in range(len(data_sizes))
],
xticklabels=langs_tgts,
)
plt.setp(ax.get_yticklabels(), rotation=0, ha="right", rotation_mode="anchor")
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
pltfile = "{}.png".format(int(time.time()))
if output_dir:
pltfile = os.path.join(output_dir, pltfile)
plt.savefig(pltfile, bbox_inches="tight")
return pltfile, Y
def litmus_main(args):
"""
Prepare predictive model
"""
assert args.load_state or args.scores_file
if args.load_state:
# Load trained model + metadata
with open(args.load_state, "rb") as pkl_f:
(
model,
featurizer,
langs_list,
langs_pivot,
langs_target,
avg_error,
baseline_error,
) = pickle.load(pkl_f)
else:
# Prepare data
(
model,
langs_list,
feat_names,
featurizer,
common_feats,
X,
Y,
langs_pivot,
langs_target,
examples,
) = prepare_data(args)
# Train prediction model
(
model,
(avg_error, std_error),
(baseline_error, baseline_std),
errors,
examples_indices,
predictions,
) = train_model(
X, Y, feat_names, common_feats, langs_pivot, langs_target, examples, args
)
if args.save_state:
with open(args.save_state, "wb") as pkl_f:
pickle.dump(
[
model,
featurizer,
langs_list,
langs_pivot,
langs_target,
avg_error,
baseline_error,
],
pkl_f,
)
ret_val = {
"error": avg_error, # Average Error Computed by specified Method
"std_error": std_error, # Standard Deviation of Predictor's Error
"langs_pivot": langs_pivot, # Pivot Languages
"langs_target": langs_target, # Target Languages
"baseline_error": baseline_error, # Error when using Mean Baseline Method instead of training model
"baseline_std_error": baseline_std, # Std of Error of mean baseline
}
"""
Inference using trained model
"""
if args.mode:
data_sizes, langs_tgts = prepare_inf_data(args, langs_list)
# Set basline perfs for all target modes
ret_val["user-config-perfs"] = get_user_config_perfs(
model, featurizer, langs_list, langs_tgts, data_sizes
)
pprint(ret_val["user-config-perfs"])
if "heatmap" in args.mode:
pltfile, Y = plot_perf_heatmap(
langs_list,
langs_pivot,
langs_tgts,
data_sizes,
model,
featurizer,
args.output_dir,
)
ret_val["heatmapFile"] = pltfile
ret_val["acc_matrix"] = {"index": langs_list, "matrix": Y.tolist()}
if "suggestions" in args.mode:
# Get baseline row for pivot sizes
pivot_row_idx = (
int(args.suggestions_pivots)
if args.suggestions_pivots != ""
else ret_val["user-config-perfs"]["best-config-idx"]
)
pivot_sizes = data_sizes[pivot_row_idx]
ret_val["suggestions"] = find_suggestions(
args,
model,
featurizer,
langs_list,
args.suggestions_budget,
args.suggestions_langbudget,
args.suggestions_targets,
pivot_sizes,
args.suggestions_augmentable,
args.suggestions_weights,
data_sizes,
args.suggestions_grid == "exponential",
args.suggestions_objective,
args.suggestions_minperf,
args.suggestions_minlangperf,
)
ret_val["suggestions"]["suggestions_row"] = pivot_row_idx
pprint(ret_val["suggestions"])
return ret_val
def parse_args(args):
parser = argparse.ArgumentParser("LITMUS Tool")
# Options for loading training data / model
parser.add_argument(
"model_name",
type=str,
default="xlmr",
help="name of model to use",
choices=SUPPORTED_MODELS,
)
parser.add_argument(
"--scores_file",
default=None,
type=str,
help="path of json file containing scores to train on",
)
parser.add_argument(
"--train_format",
default="json",
help="Format of the training data",
choices=["json", "csv"],
)
parser.add_argument(
"--save_state",
default=None,
type=str,
help="Save state of training of model to pickle file",
)
parser.add_argument(
"--load_state",
default=None,
type=str,
help="Load trained model from pickle file",
)
parser.add_argument(
"--test_file",
default=None,
type=str,
help="Test file to generate prediction. If `None` then no predictions are made",
)
# Feature options
parser.add_argument(
"--precomputed_features",
type=str,
default=PRECOMPUTED_FEATURE_FILE,
help="Path to precomputed-features file.",
)
parser.add_argument(
"--pivot_features",
type=str,
default="none",
choices=["none", "all", "data_only"],
help="What features based on pivot langs to use",
)
parser.add_argument(
"--use_all_langs",
action="store_true",
help="Add features based on all langs the tool supports (Needed for transfer)",
)
parser.add_argument(
"--common_scaling",
action="store_true",
help="Common min max scaling params that are pvt dependent(data size, type overlap, distance)",
)
# Model training options
parser.add_argument(
"--training_algorithm",
type=str,
default="xgboost",
help="which regressor to use",
choices=["xgboost", "mlp"],
)
parser.add_argument(
"--error_method",
type=str,
default="split",
choices=["LOO", "LOTO", "LOLO", "LOCO", "split", "kfold", "manual_split"],
)
parser.add_argument(
"--dont_print",
action="store_false",
dest="print",
help="disable any form of printing",
)
# Experimental Options
parser.add_argument(
"--train_indices",
default=None,
help="train indices if error_method is manual_split",
)
parser.add_argument(
"--test_indices",
default=None,
help="test indices if error_method is manual_split",
)
parser.add_argument(
"--model", default=None, help="Model to load"
) # hack - for calling from external script using actual model
parser.add_argument(
"--load_model",
type=str,
default=None,
help="Load neural network model from saved file",
)
parser.add_argument(
"--save_model", type=str, default=None, help="Save neural network model to file"
)
# Options for inferencing
parser.add_argument(
"--data_sizes",
default="",
help="Pivot data-size configs (semi-colon separated configs, each config itself being comma-separated key-value pairs)",
)
parser.add_argument(
"--mode",
type=str,
default=None,
nargs="+",
help="Output modes (comma-separated). Choose from following: {heatmap, suggestions}.",
)
# Options for heatmap comparison of multiple user-configs
parser.add_argument(
"--output_dir", type=str, default=None, help="Overrride output directory"
)
parser.add_argument(
"--heatmap_targets",
type=str,
default=None,
help="Targets for heatmap. Overrides suggestions_targets (which is used by deafult)",
)
# Options for suggestions finding
parser.add_argument(
"--suggestions_budget",
type=int,
default=0,
help="Budget for finding suggestions of which languages to add data for (0 to disable)",
)
parser.add_argument(
"--suggestions_langbudget",
type=str,
default="",
help="Language-specific budget for finding suggestions (overrrides suggestions_budget for these langs, comma-separated list of key:value pairs)",
)
parser.add_argument(
"--suggestions_targets",
type=str,
default="",
help="Targets being considered (comma-separated)",
)
parser.add_argument(
"--suggestions_weights",
type=str,
default="",
help="Target weights for avg perf objective (comma-separated list of key:value pairs, default wt=1)",
)
parser.add_argument(
"--suggestions_pivots",
type=str,
default="",
help="Index of desired row in data_sizes",
)
parser.add_argument(
"--suggestions_augmentable",
type=str,
default="",
help="Set of augmentable languages (comma-separated)",
)
parser.add_argument(
"--suggestions_grid",
type=str,
default="exponential",
choices=["exponential", "linear"],
help="Search space grid to use for suggestions",
)
parser.add_argument(
"--suggestions_objective",
type=str,
default="avg",
help="Objective function to be used for finding suggestions",
choices=["avg", "min"],
)
parser.add_argument(
"--suggestions_minperf",
type=str,
default="",
help="Minimum acceptable average performance across tgts",
)
parser.add_argument(
"--suggestions_minlangperf",
type=str,
default="",
help="Minimum acceptable performance for given tgts (comma-separated list of key:value pairs)",
)
parser.add_argument(
"--suggestions_verbose", action="store_true", help="Verbose logging of search"
)
return parser.parse_args(args)
if __name__ == "__main__":
args = parse_args(sys.argv[1:])
litmus_main(args)