How to use the xarray.Dataset.from_dataframe function in xarray

To help you get started, we’ve selected a few xarray examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deeplycloudy / glmtools / glmtools / io / glm.py View on Github external
('lutevent_y', 'f8'),
                      ('lutevent_energy','f8'),
                      ('lutevent_count', 'f4'),
                      ('lutevent_flash_count', 'f4'),
                      ('lutevent_group_count', 'f4'),
                      ('lutevent_total_flash_area', 'f8'),
                      ('lutevent_total_group_area', 'f8'),
                      ('lutevent_time_offset', '
github jaws / jaws / jaws / gcnet2nc.py View on Github external
def gcnet2nc(args, input_file, output_file, stations):
    df = init_dataframe(args, input_file)
    station_number = df['station_number'][0]
    df.drop('station_number', axis=1, inplace=True)

    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    surface_temp = extrapolate_temp(df)

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Calculating quality control variables')
    fill_dataset_quality_control(df, ds, input_file)

    if args.no_drv_tm:
        pass
github ARVE-Research / gwgen / gwgen / parameterization.py View on Github external
def ds(self):
        """The dataset of the :attr:`data` DataFrame"""
        import xarray as xr
        ds = xr.Dataset.from_dataframe(self.data.set_index('wetf'))
        ds.wetf.attrs['long_name'] = 'Fraction of wet days'
        ds.p11.attrs['long_name'] = 'Prob. Wet then Wet'
        ds.p101.attrs['long_name'] = 'Prob. Wet then Dry then Wet'
        ds.p001.attrs['long_name'] = 'Prob. Dry then Dry then Wet'
        ds.p11.attrs['symbol'] = 'p_{11}'
        ds.p101.attrs['symbol'] = 'p_{101}'
        ds.p001.attrs['symbol'] = 'p_{001}'
        return ds
github jaredwo / obsio / obsio / providers / generic.py View on Github external
obs.columns = [col[1] for col in obs.columns.values]
            return obs
        
        elif data_structure == 'array':
            
            obs = obs.unstack(level=1)
            obs.columns = [col[1] for col in obs.columns.values]
            obs = xr.Dataset.from_dataframe(obs.swaplevel(0,1))
            
            if stn_ids is None:
                stns = self.stns
            else:
                stns = self.stns.loc[stn_ids]
            
            #include station metadata
            obs.merge(xr.Dataset.from_dataframe(stns), inplace=True)
            
            return obs
        
        else:
            
            raise ValueError("Unrecognized data format. Expected one of: "
                             "'stacked', 'tidy', 'array'")
github jaws / jaws / jaws / aaws2nc.py View on Github external
def aaws2nc(args, input_file, output_file, stations):
    df = init_dataframe(args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    time, time_bounds, sza = get_time_and_sza(
        args, input_file, latitude, longitude, df)[:3]

    if args.no_drv_tm:
        pass
    else:
        common.log(args, 5, 'Calculating month and day')
        year, month, day, hour, day_of_year = get_time_and_sza(
            args, input_file, latitude, longitude, df)[3:]
        ds['year'] = 'time', year
github ceholden / yatsm / yatsm / io / backends / _gdal.py View on Github external
def get_metadata(self, items=None):
        """ Return a xr.Dataset of metadata from the input image list

        Args:
            items (iterable): Subset of metadata column names (`self.extra_md`)
                to return

        Returns:
            xarray.Dataset: A Dataset containing the time series metadata
            with coordinate dimenisons (time)

        """
        if not items:
            items = self.extra_md
        return xr.Dataset.from_dataframe(self.df[items])
github jaws / jaws / jaws / scar2nc.py View on Github external
def scar2nc(args, input_file, output_file):
    """Main function to convert SCAR txt file to netCDF"""
    df, temperature_vars, pressure_vars, station_name, latitude, longitude, height, country, institution = init_dataframe(
        args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Calculating time and sza')
    time, time_bounds, sza, day_of_year = get_time_and_sza(
        args, df, latitude, longitude)

    ds['day_of_year'] = 'time', day_of_year

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    ds['height'] = tuple(), height
github ARVE-Research / gwgen / gwgen / main.py View on Github external
self.logger.debug('Calculating %s bias correction for experiment %s',
                          vname, self.experiment)
        postproc_dir = self.exp_config.setdefault(
            'postprocdir', osp.join(self.exp_config['expdir'], 'postproc'))
        if ds is None:
            df = pd.DataFrame(info[vname]).T
            try:
                # drop all percentiles
                df.drop('All', inplace=True)
            except (ValueError, KeyError) as e:
                pass
            df.index.name = 'pctl'
            df.reset_index(inplace=True)
            df['unorm'] = stats.norm.ppf(
                df['pctl'].astype(float) / 100., 0, 1.0)
            ds = xr.Dataset.from_dataframe(df)

        # --- plots
        d = self.exp_config.setdefault('postproc', OrderedDict()).setdefault(
            'bias', OrderedDict()).setdefault(vname, OrderedDict())
        plot_output = plot_output or d.get('plot_output')
        if plot_output is None:
            plot_output = osp.join(
                postproc_dir, vname + '_bias_correction.pdf')

        project_output = osp.splitext(plot_output)[0] + '.pkl'
        nc_output = osp.splitext(plot_output)[0] + '.nc'

        d['plot_file'] = plot_output
        d['project_file'] = project_output
        d['nc_file'] = nc_output
github jaws / jaws / jaws / gcnet2nc.py View on Github external
def gcnet2nc(args, input_file, output_file, stations):
    """Main function to convert GCNet ascii file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    station_number = df['station_number'][0]
    df.drop('station_number', axis=1, inplace=True)

    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    # surface_temp = extrapolate_temp(df)

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Calculating quality control variables')
    fill_dataset_quality_control(df, ds, input_file)

    if args.flx:
        common.log(args, 5, 'Calculating Sensible and Latent Heat Fluxes')
github rustychris / stompy / stompy / io / local / usgs_sfbay.py View on Github external
days_per_request=days_per_request)
    
    #hier=polaris.set_index(['Date','Station Number','Depth'])
    # there were 10 rows, 2017-04-04, stations 35 and 36, with duplicate
    # entries. Like they measured the same location, same day, 1 hour apart.
    hier=polaris.groupby(['Date','Station Number','Depth']).first()
    if len(hier) != len(polaris):
        logging.warning("After grouping by date, station and depth, there were some duplicates.")

    import warnings

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")


    ds=xr.Dataset.from_dataframe(hier)
    ds=ds.rename({'Station Number':'station','Depth':'depth','Date':'cruise'})
    ds['date']=ds['cruise']

    ds=ds.set_coords(['Julian Date','Days since 1/1/1990','Decimal Date','time',
                      'Distance from 36','longitude','latitude'])
    
    def agg_field(ds,fld,agg):
        with warnings.catch_warnings():
            # ignore RuntimeWarning due to all-nan slices
            # and FutureWarning for potential NaT!=NaT comparison
            warnings.simplefilter('ignore')
            vmin=ds[fld].min(dim=agg)
            vmax=ds[fld].max(dim=agg)
            # funny comparisons to check for either nan/nat or that they
            # are equal.
            if np.any( (vmin==vmin) & (vmin!=vmax) ):