Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
while index < len(args):
filename = args[index]
filename = args[index]
print("filename", filename)
dataset = None
if filename.startswith("cluster://"):
dataset = vaex.open(filename) # , thread_mover=self.call_in_main_thread)
elif filename.startswith("http://") or filename.startswith("ws://"): # TODO: thinkg about https wss
# o = urlparse(filename)
# assert o.scheme == "http"
# base_path, should_be_datasets, dataset_name = o.path.rsplit("/", 2)
# if should_be_datasets != "datasets":
# error("expected an url in the form http://host:port/optional/part/datasets/dataset_name")
# server = vaex.server(hostname=o.hostname, port = o.port or 80, thread_mover=self.call_in_main_thread, base_path=base_path)
if 0:
server = vaex.server(filename, thread_mover=self.call_in_main_thread)
datasets = server.datasets()
names = [dataset.name for dataset in datasets]
index += 1
if index >= len(args):
error("expected dataset to follow url, e.g. vaex http://servername:9000 somedataset, possible dataset names: %s" % " ".join(names))
name = args[index]
if name not in names:
error("no such dataset '%s' at server, possible dataset names: %s" % (name, " ".join(names)))
found = [dataset for dataset in datasets if dataset.name == name]
if found:
dataset = found[0]
dataset = vaex.open(filename, thread_mover=self.call_in_main_thread)
self.add_recently_opened(filename)
# dataset = self.open(filename)
elif filename[0] == ":": # not a filename, but a classname
"""
import vaex
try:
if path in aliases:
path = aliases[path]
if path.startswith("http://") or path.startswith("ws://"): # TODO: think about https and wss
server, name = path.rsplit("/", 1)
url = urlparse(path)
if '?' in name:
name = name[:name.index('?')]
extra_args = {key: values[0] for key, values in parse_qs(url.query).items()}
if 'token' in extra_args:
kwargs['token'] = extra_args['token']
if 'token_trusted' in extra_args:
kwargs['token_trusted'] = extra_args['token_trusted']
server = vaex.server(server, **kwargs)
dataframe_map = server.datasets(as_dict=True)
if name not in dataframe_map:
raise KeyError("no such DataFrame '%s' at server, possible names: %s" % (name, " ".join(dataframe_map.keys())))
return dataframe_map[name]
if path.startswith("cluster"):
import vaex.distributed
return vaex.distributed.open(path, *args, **kwargs)
else:
import vaex.file
import glob
if isinstance(path, six.string_types):
paths = [path]
else:
paths = path
filenames = []
for path in paths:
self.signal_end.emit()
# if new tasks were added as a result of this, execute them immediately
# TODO: we may want to include infinite recursion protection
self._is_executing = False
if len(self.task_queue) > 0:
logger.debug("task queue not empty.. start over!")
self.execute()
finally:
self._is_executing = False
if __name__ == "__main__":
import vaex
import sys
vaex.set_log_level_debug()
server = vaex.server(sys.argv[1], port=int(sys.argv[2]))
datasets = server.datasets()
print(datasets)
dataset = datasets[0]
dataset = vaex.example()
print(dataset("x").minmax())
dataset.select("x < 0")
print(dataset.selected_length(), len(dataset))
print(dataset("x").selected().is_masked)
print(dataset("x").selected().minmax())
__author__ = 'breddels'
import vaex as vx
import numpy as np
import pylab
server = vx.server("localhost")
list = server.list_datasets()
print list
ds = server.open(list[0])
print "length", len(ds)
subspace = ds("x", "y")
limits = subspace.limits_sigma(sigmas=3, square=True)
ds.select("z>50")
selected = subspace.selected()
print subspace.mean()
print subspace.var()
print subspace.limits_sigma()
verbosity = ["ERROR", "WARNING", "INFO", "DEBUG"]
logging.getLogger("vaex").setLevel(verbosity[min(3, args.verbose)])
quiet = args.quiet
if args.task == "check":
name = args.name
clusterlist = vaex.settings.cluster.get("clusters." + name, None)
if clusterlist is None:
if not quiet:
print("cluster does not exist: %s" % name)
else:
common = None
for hostname in clusterlist:
print(hostname)
try:
server = vx.server(hostname)
datasets = server.datasets()
except socket.error as e:
print("\t" + str(e))
if args.clean:
clusterlist.remove(hostname)
else:
for dataset in datasets:
print("\t" +dataset.name)
#if common is None:
names = set([k.name for k in datasets])
common = names if common is None else common.union(names)
print("Cluster: " + name + " has %d hosts connected, to connect to a dataset, use the following urls:" % (len(clusterlist)) )
for dsname in common:
print("\tcluster://%s/%s" % (name, dsname))
if args.clean:
vaex.settings.cluster.store("clusters." + name, clusterlist)
def open(url, thread_mover=None):
url = urlparse(url)
assert url.scheme in ["cluster"]
port = url.port
base_path = url.path
if base_path.startswith("/"):
base_path = base_path[1:]
clustername = url.hostname
clusterlist = vaex.settings.cluster.get("clusters." + clustername, None)
if clusterlist:
datasets = []
for hostname in clusterlist:
try:
server = vx.server(hostname, thread_mover=thread_mover)
datasets_dict = server.datasets(as_dict=True)
except socket.error as e:
logger.info("could not connect to %s, skipping", hostname)
else:
dataset = datasets_dict[base_path]
datasets.append(dataset)
#datasets.append(vx.server(url).datasets()[0])
dsd=DatasetDistributed(datasets=datasets)
return dsd
def server_connect(*ignore):
servers = vaex.settings.main.get("servers", ["ws://localhost:9000/"])
server = str(dialogs.choose(self, "Connect to server", "Connect to server", servers, editable=True))
if server is None:
return
try:
vaex_server = vaex.server(server, thread_mover=self.call_in_main_thread)
datasets = vaex_server.datasets()
except Exception as e:
dialogs.dialog_error(self, "Error connecting", "Error connecting: %r" % e)
return
dataset_descriptions = ["{} ({:,} rows)".format(dataset.name, len(dataset)) for dataset in datasets]
dataset_index = dialogs.choose(self, "Choose datasets", "Choose dataset", dataset_descriptions)
if dataset_index is None:
return
dataset = datasets[dataset_index]
self.dataset_selector.add(dataset)
self.add_recently_opened(dataset.path)
if server in servers:
servers.remove(server)
servers.insert(0, server)
vaex.settings.main.store("servers", servers)