Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
session=[],
prior_probs=self.data3["prior_probs"],
trans_probs=self.data3["trans_probs"],
param_cond_cmd_probs=self.data3["param_cond_cmd_probs"],
value_cond_param_probs=self.data3["value_cond_param_probs"],
modellable_params=set(),
window_len=1,
use_start_end_tokens=True,
start_token=START_TOKEN,
end_token=END_TOKEN,
use_geo_mean=False,
)
self.assertListEqual(actual, [0.25])
actual = cmds_params_values.compute_likelihood_windows_in_session(
session=[Cmd("Set-User", {"Identity": "blah"})],
prior_probs=self.data3["prior_probs"],
trans_probs=self.data3["trans_probs"],
param_cond_cmd_probs=self.data3["param_cond_cmd_probs"],
value_cond_param_probs=self.data3["value_cond_param_probs"],
modellable_params=set(),
window_len=1,
use_start_end_tokens=False,
start_token=START_TOKEN,
end_token=END_TOKEN,
use_geo_mean=False,
)
self.assertListEqual(actual, [0.22787717886202657])
prior_probs=self.data3["prior_probs"],
trans_probs=self.data3["trans_probs"],
param_cond_cmd_probs=self.data3["param_cond_cmd_probs"],
value_cond_param_probs=self.data3["value_cond_param_probs"],
modellable_params=set(),
window_len=1,
use_start_end_tokens=True,
start_token=START_TOKEN,
end_token=END_TOKEN,
use_geo_mean=False,
)
self.assertListEqual(actual[0], [])
self.assertEqual(actual[1], 0.25)
actual = cmds_params_values.rarest_window_session(
session=[Cmd("Set-User", {"City": "york"}), Cmd("drfjh", {})],
prior_probs=self.data3["prior_probs"],
trans_probs=self.data3["trans_probs"],
param_cond_cmd_probs=self.data3["param_cond_cmd_probs"],
value_cond_param_probs=self.data3["value_cond_param_probs"],
modellable_params=set(),
window_len=3,
use_start_end_tokens=False,
start_token=START_TOKEN,
end_token=END_TOKEN,
use_geo_mean=False,
)
self.assertListEqual(actual[0], [])
self.assertTrue(np.isnan(actual[1]))
actual = cmds_params_values.rarest_window_session(
START_TOKEN: {UNK_TOKEN: 1},
END_TOKEN: {UNK_TOKEN: 1},
UNK_TOKEN: {UNK_TOKEN: 1},
}
self.data2["value_counts"] = defaultdict(lambda: 0)
self.data2["value_counts_ls"] = {UNK_TOKEN: 1}
self.data2["param_value_counts"] = defaultdict(lambda: defaultdict(lambda: 0))
self.data2["param_value_counts_ls"] = {UNK_TOKEN: {UNK_TOKEN: 1}}
self.data2["cmds"] = [START_TOKEN, END_TOKEN, UNK_TOKEN]
self.data2["params"] = [UNK_TOKEN]
# populate data3
cmd = "Set-User"
self.data3["sessions"] = [
[
Cmd(name="Set-User", params={"City": "york", "Identity": "blah"}),
Cmd(name="Set-User", params={"Identity": "blah"}),
]
]
self.data3["seq1_counts"] = defaultdict(
lambda: 0, {START_TOKEN: 1, cmd: 2, END_TOKEN: 1}
)
self.data3["seq1_counts_ls"] = {
UNK_TOKEN: 6,
START_TOKEN: 4,
END_TOKEN: 4,
cmd: 8,
}
self.data3["seq2_counts"] = defaultdict(lambda: defaultdict(lambda: 0))
self.data3["seq2_counts"][START_TOKEN][cmd] = 1
self.data3["seq2_counts"][cmd][cmd] = 1
self.data3["seq2_counts"][cmd][END_TOKEN] = 1
]
self.sessions2 = [
[
Cmd("Set-User", {"Identity"}),
Cmd("Set-User", {"Identity", "City", "Name"}),
],
[
Cmd("Set-Mailbox", {"Identity"}),
Cmd("Set-User", {"Identity", "City"}),
Cmd("Set-User", {"Identity"}),
],
]
self.sessions3 = [
[
Cmd("Set-User", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "haha", "City": "york", "Name": "bob"}),
],
[
Cmd("Set-Mailbox", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "blah", "City": "london"}),
Cmd("Set-User", {"Identity": "haha"}),
],
]
self.times = [pd.datetime(2019, 3, 1), pd.datetime(2019, 5, 6)]
self.data1 = pd.DataFrame({"session": self.sessions1, "time": self.times})
self.data2 = pd.DataFrame({"session": self.sessions2, "time": self.times})
self.data3 = pd.DataFrame({"session": self.sessions3, "time": self.times})
self.data2["param_probs"] = StateMatrix({UNK_TOKEN: 0.3}, UNK_TOKEN)
self.data2["param_cond_cmd_probs"] = StateMatrix(
{
START_TOKEN: {UNK_TOKEN: 0.3333333333333333},
END_TOKEN: {UNK_TOKEN: 0.3333333333333333},
UNK_TOKEN: {UNK_TOKEN: 0.25},
},
UNK_TOKEN,
)
# populate data3
cmd = "Set-User"
self.data3["sessions"] = [
[
Cmd(name="Set-User", params={"City", "Identity"}),
Cmd(name="Set-User", params={"Identity"}),
]
]
self.data3["_seq1_counts"] = defaultdict(
lambda: 0, {START_TOKEN: 1, END_TOKEN: 1, cmd: 2}
)
self.data3["seq1_counts"] = StateMatrix(
{UNK_TOKEN: 6, START_TOKEN: 4, END_TOKEN: 4, cmd: 8}, UNK_TOKEN
)
self.data3["_seq2_counts"] = defaultdict(lambda: defaultdict(lambda: 0))
self.data3["_seq2_counts"][START_TOKEN][cmd] = 1
self.data3["_seq2_counts"][cmd][END_TOKEN] = 1
self.data3["_seq2_counts"][cmd][cmd] = 1
self.data3["seq2_counts"] = StateMatrix(
{
START_TOKEN: {END_TOKEN: 1, UNK_TOKEN: 1, cmd: 2},
UNK_TOKEN: {END_TOKEN: 1, UNK_TOKEN: 1, cmd: 1},
def setUp(self) -> None:
self.sessions1 = [
["Set-User", "Set-User"],
["Set-Mailbox", "Set-User", "Set-User"],
]
self.sessions2 = [
[
Cmd("Set-User", {"Identity"}),
Cmd("Set-User", {"Identity", "City", "Name"}),
],
[
Cmd("Set-Mailbox", {"Identity"}),
Cmd("Set-User", {"Identity", "City"}),
Cmd("Set-User", {"Identity"}),
],
]
self.sessions3 = [
[
Cmd("Set-User", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "haha", "City": "york", "Name": "bob"}),
],
[
Cmd("Set-Mailbox", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "blah", "City": "london"}),
Cmd("Set-User", {"Identity": "haha"}),
],
def setUp(self) -> None:
self.sessions1 = [
["Set-User", "Set-User"],
["Set-Mailbox", "Set-User", "Set-User"],
]
self.sessions2 = [
[
Cmd("Set-User", {"Identity"}),
Cmd("Set-User", {"Identity", "City", "Name"}),
],
[
Cmd("Set-Mailbox", {"Identity"}),
Cmd("Set-User", {"Identity", "City"}),
Cmd("Set-User", {"Identity"}),
],
]
self.sessions3 = [
[
Cmd("Set-User", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "haha", "City": "york", "Name": "bob"}),
],
[
Cmd("Set-Mailbox", {"Identity": "blah"}),
Cmd("Set-User", {"Identity": "blah", "City": "london"}),
Cmd("Set-User", {"Identity": "haha"}),
],
]
self.times = [pd.datetime(2019, 3, 1), pd.datetime(2019, 5, 6)]
Returns
-------
list of likelihoods
"""
if use_start_end_tokens:
if start_token is None or end_token is None:
raise MsticpyException(
"start_token and end_token should not be set to None when "
"use_start_end_tokens is set to True"
)
likelihoods = []
sess = session.copy()
if use_start_end_tokens and end_token:
sess += [Cmd(name=str(end_token), params=dict())]
end = len(sess) - window_len
for i in range(end + 1):
window = sess[i : i + window_len] # noqa E203
if i == 0:
use_start = use_start_end_tokens
else:
use_start = False
lik = compute_likelihood_window(
window=window,
prior_probs=prior_probs,
trans_probs=trans_probs,
param_cond_cmd_probs=param_cond_cmd_probs,
value_cond_param_probs=value_cond_param_probs,
modellable_params=modellable_params,
use_start_token=use_start,
use_end_token=False,
-------
List[float]
list of likelihoods
"""
if use_start_end_tokens:
if start_token is None or end_token is None:
raise MsticpyException(
"start_token and end_token should not be set to None when "
"use_start_end_tokens is set to True"
)
likelihoods = []
sess = session.copy()
if use_start_end_tokens and end_token:
sess += [Cmd(name=str(end_token), params={})]
end = len(sess) - window_len
for i in range(end + 1):
window = sess[i : i + window_len] # noqa E203
if i == 0:
use_start = use_start_end_tokens
else:
use_start = False
lik = compute_likelihood_window(
window=window,
prior_probs=prior_probs,
trans_probs=trans_probs,
param_cond_cmd_probs=param_cond_cmd_probs,
use_start_token=use_start,
use_end_token=False,