Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_sql_building(self):
context = self._create_context()
table = datalab.bigquery.Table('test:requestlogs.today', context=context)
udf = self._create_udf()
query = datalab.bigquery.Query('SELECT * FROM foo($t)', t=table, udfs=[udf], context=context)
expected_js = '\nfoo=function(r,emit) { emit({output1: r.field2, output2: r.field1 }); };\n' +\
'bigquery.defineFunction(\'foo\', ["field1", "field2"], ' +\
'[{"name": "output1", "type": "integer"}, ' +\
'{"name": "output2", "type": "string"}], foo);'
self.assertEqual(query.sql, 'SELECT * FROM '
'(SELECT output1, output2 FROM foo([test:requestlogs.today]))')
self.assertEqual(udf._code, expected_js)
def _create_query(sql=None):
if sql is None:
sql = 'SELECT * ...'
return datalab.bigquery.Query(sql, context=TestCases._create_context())
Args:
data: Can be one of:
A string of sql query.
A sql query module defined by "%%sql --module module_name".
A pandas DataFrame.
Regardless of data type, it must include the following columns:
"feature": identifies a slice of features. For example: "petal_length:4.0-4.2".
"count": number of instances in that slice of features.
All other columns are viewed as metrics for its feature slice. At least one is required.
"""
import IPython
if isinstance(data, ModuleType) or isinstance(data, basestring):
item, _ = datalab.data.SqlModule.get_sql_statement_with_environment(data, {})
query = datalab.bigquery.Query(item)
df = query.results().to_dataframe()
data = self._get_lantern_format(df)
elif isinstance(data, pd.core.frame.DataFrame):
data = self._get_lantern_format(data)
else:
raise Exception('data needs to be a sql query, or a pandas DataFrame.')
HTML_TEMPLATE = """
def _get_gcs_csv_row_count(self, federated_table):
import datalab.bigquery as bq
results = bq.Query('SELECT count(*) from data',
data_sources={'data': federated_table}).results()
return results[0].values()[0]
args: the dictionary of magic arguments.
cell: the cell contents which can be variable value overrides (if args has a 'query'
value) or inline SQL otherwise.
env: a dictionary that is used for looking up variable values.
Returns:
A Query object.
"""
sql_arg = args.get('query', None)
if sql_arg is None:
# Assume we have inline SQL in the cell
if not isinstance(cell, basestring):
raise Exception('Expected a --query argument or inline SQL')
return datalab.bigquery.Query(cell, values=env)
item = datalab.utils.commands.get_notebook_item(sql_arg)
if isinstance(item, datalab.bigquery.Query): # Queries are already expanded.
return item
# Create an expanded BQ Query.
config = datalab.utils.commands.parse_config(cell, env)
item, env = datalab.data.SqlModule.get_sql_statement_with_environment(item, config)
if cell:
env.update(config) # config is both a fallback and an override.
return datalab.bigquery.Query(item, values=env)
DEFINE QUERY
on a line by itself.
Args:
args: the optional arguments following '%%sql'.
cell: the contents of the cell; Python code for arguments followed by SQL queries.
"""
name = args['module'] if args['module'] else '_sql_cell'
module = imp.new_module(name)
query = _split_cell(cell, module)
ipy = IPython.get_ipython()
if not args['module']:
# Execute now
if query:
return datalab.bigquery.Query(query, values=ipy.user_ns) \
.execute(dialect=args['dialect'], billing_tier=args['billing']).results
else:
# Add it as a module
sys.modules[name] = module
exec('import %s' % name, ipy.user_ns)