Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def sentence_to_vectors(sentence):
# Create an empty array of vectors
vects = dlib.vectors()
for word in sentence.split():
# Our vectors are very simple 1-dimensional vectors. The value of the
# single feature is 1 if the first letter of the word is capitalized and
# 0 otherwise.
if word[0].isupper():
vects.append(dlib.vector([1]))
else:
vects.append(dlib.vector([0]))
return vects
def sentence_to_vectors(sentence):
# Create an empty array of vectors
vects = dlib.vectors()
for word in sentence.split():
# Our vectors are very simple 1-dimensional vectors. The value of the
# single feature is 1 if the first letter of the word is capitalized and
# 0 otherwise.
if word[0].isupper():
vects.append(dlib.vector([1]))
else:
vects.append(dlib.vector([0]))
return vects
def predict_gender(encoding):
result = _classifier(dlib.vector(encoding))
if result > 0.5:
return "male"
if result < -0.5:
return "female"
return "unknown"
# is always a vector, but deciding what to put into it to solve your
# problem is often not a trivial task. Part of the difficulty is that
# you need an efficient method for finding the label that makes
# dot(w,PSI(x,label)) the biggest. Sometimes this is easy, but often
# finding the max scoring label turns into a difficult combinatorial
# optimization problem. So you need to pick a PSI that doesn't make the
# label maximization step intractable but also still well models your
# problem.
#
# Create a dense vector object (note that you can also use unsorted
# sparse vectors (i.e. dlib.sparse_vector objects) to represent your
# PSI vector. This is useful if you have very high dimensional PSI
# vectors that are mostly zeros. In the context of this example, you
# would simply return a dlib.sparse_vector at the end of make_psi() and
# the rest of the example would still work properly. ).
psi = dlib.vector()
# Set it to have 9 dimensions. Note that the elements of the vector
# are 0 initialized.
psi.resize(self.num_dimensions)
dims = len(x)
if label == 0:
for i in range(0, dims):
psi[i] = x[i]
elif label == 1:
for i in range(dims, 2 * dims):
psi[i] = x[i - dims]
else: # the label must be 2
for i in range(2 * dims, 3 * dims):
psi[i] = x[i - 2 * dims]
return psi
doc_id_train = train_data["doc_id"].tolist()
train_features = train_data[settings.feature_selected]
# train_true = list(train_data["label"])
train_true = train_data["label"].tolist()
# testing
test_data = pd.read_csv('test_features.csv', index_col=0, encoding="ISO-8859-1")
query_id_test = test_data["query_id"].tolist()
doc_id_test = test_data["doc_id"].tolist()
test_features = test_data[settings.feature_selected]
test_true = test_data["label"]
data = dlib.ranking_pair()
for i in range(len(train_true)):
if train_true[i] == 1:
data.relevant.append(dlib.vector(train_features[i]))
elif train_true[i] == 0:
data.nonrelevant.append(dlib.vector(train_features[i]))
trainer = dlib.svm_rank_trainer()
trainer.c = 10
rank = trainer.train(data)
print("Ranking score for a relevant vector: {}".format(
rank(data.relevant[0])))
print("Ranking score for a non-relevant vector: {}".format(
rank(data.nonrelevant[0])))
# train_true = list(train_data["label"])
train_true = train_data["label"].tolist()
# testing
test_data = pd.read_csv('test_features.csv', index_col=0, encoding="ISO-8859-1")
query_id_test = test_data["query_id"].tolist()
doc_id_test = test_data["doc_id"].tolist()
test_features = test_data[settings.feature_selected]
test_true = test_data["label"]
data = dlib.ranking_pair()
for i in range(len(train_true)):
if train_true[i] == 1:
data.relevant.append(dlib.vector(train_features[i]))
elif train_true[i] == 0:
data.nonrelevant.append(dlib.vector(train_features[i]))
trainer = dlib.svm_rank_trainer()
trainer.c = 10
rank = trainer.train(data)
print("Ranking score for a relevant vector: {}".format(
rank(data.relevant[0])))
print("Ranking score for a non-relevant vector: {}".format(
rank(data.nonrelevant[0])))
# run compile_dlib_python_module.bat. This should work on any operating
# system so long as you have CMake and boost-python installed.
# On Ubuntu, this can be done easily by running the command:
# sudo apt-get install libboost-python-dev cmake
import dlib
# Now let's make some testing data. To make it really simple, let's suppose
# that we are ranking 2D vectors and that vectors with positive values in the
# first dimension should rank higher than other vectors. So what we do is make
# examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking)
# vectors and store them into a ranking_pair object like so:
data = dlib.ranking_pair()
# Here we add two examples. In real applications, you would want lots of
# examples of relevant and non-relevant vectors.
data.relevant.append(dlib.vector([1, 0]))
data.nonrelevant.append(dlib.vector([0, 1]))
# Now that we have some data, we can use a machine learning method to learn a
# function that will give high scores to the relevant vectors and low scores to
# the non-relevant vectors.
trainer = dlib.svm_rank_trainer()
# Note that the trainer object has some parameters that control how it behaves.
# For example, since this is the SVM-Rank algorithm it has a C parameter that
# controls the trade-off between trying to fit the training data exactly or
# selecting a "simpler" solution which might generalize better.
trainer.c = 10
# So let's do the training.
rank = trainer.train(data)
# Now if you call rank on a vector it will output a ranking score. In