# AUTOGENERATED! DO NOT EDIT! File to edit: XmlElementTree.ipynb (unless otherwise specified).

__all__ = ['dict2tuple', 'Dict2Data', 'read_asxml', 'xml2dict', 'exclude_kpts', 'get_ispin', 'get_summary',
           'join_ksegments', 'get_kpts', 'get_tdos', 'get_evals', 'get_bands_pro_set', 'get_dos_pro_set',
           'get_structure', 'export_vasprun', 'load_export', 'dump_dict', 'load_from_dump', 'islice2array',
           'slice_data', 'split_vasprun']

# Cell
import re
import os
import json
import pickle
from itertools import islice, chain, product
from collections import namedtuple

import numpy as np
from importlib.machinery import SourceFileLoader
import textwrap
import xml.etree.ElementTree as ET
# Inside packages import to work both with package and jupyter notebook.
try:
    from pivotpy import utils as gu
    from .sio import read_ticks
except:
    import pivotpy.utils as gu
    import pivotpy.sio.read_ticks as read_ticks

# Cell
def dict2tuple(name,d):
    """Converts a dictionary (nested as well) to namedtuple, accessible via index and dot notation as well as by unpacking.
    - **Parameters**
        - name: Name of the tuple.
        - d   : Dictionary, nested works as well.
    """
    return namedtuple(name,d.keys())(
           *(dict2tuple(k.upper(),v) if isinstance(v,dict) else v for k,v in d.items())
           )

# Cell
class Dict2Data:
    """
    - Returns a Data object with dictionary keys as attributes of Data accessible by dot notation or by key. Once an attribute is created, it can not be changed from outside.
    - **Parmeters**
        - dict : Python dictionary (nested as well) containing any python data types.
    - **Methods**
        - to_dict  : Converts a Data object to dictionary if it could be made a dictionary, otherwise throws relevant error.
        - to_json  : Converts to json str or save to file if `outfil` given. Accepts `indent` as parameter.
        - to_pickle: Converts to bytes str or save to file if `outfile` given.
        - to_tuple : Converts to a named tuple.
    - **Example**
        > x = Dict2Data({'A':1,'B':{'C':2}})
        > x
        > Data(
        >     A = 1
        >     B = Data(
        >         C = 2
        >         )
        >     )
        > x.B.to_dict()
        > {'C': 2}
    """
    def __init__(self,d):
        if isinstance(d,Dict2Data):
            d = d.to_dict() # if nested Dict2Dataects, must expand here.
        for a,b in d.items():
            if isinstance(b,Dict2Data):
                b = b.to_dict() # expands self instance !must here.
            if isinstance(b,(list,tuple)):
                setattr(self,a,[Dict2Data(x) if isinstance(x,dict) else x for x in b])
            else:
                setattr(self,a,Dict2Data(b) if isinstance(b,dict) else b)

    def to_dict(self):
        """Converts a `Dict2Data` object (root or nested level) to a dictionary.
        """
        result = {}
        for k,v in self.__dict__.items():
            if isinstance(v,Dict2Data):
                result.update({k:Dict2Data.to_dict(v)})
            else:
                result.update({k:v})
        return result
    def to_json(self,outfile=None,indent=1):
        """Dumps a `Dict2Data` object (root or nested level) to json.
        - **Parameters**
            - outfile : Default is None and returns string. If given, writes to file.
            - indent  : Json indent. Default is 1.
        """
        return dump_dict(self,dump_to='json',outfile=outfile,indent=indent)

    def to_pickle(self,outfile=None):
        """Dumps a `Dict2Data` object (root or nested level) to pickle.
        - **Parameters**
            - outfile : Default is None and returns string. If given, writes to file.
        """
        return dump_dict(self,dump_to='pickle',outfile=outfile)

    def to_tuple(self):
        """Creates a namedtuple."""
        return dict2tuple('Data',self.to_dict())

    def write(self,outfile=None):
        """ If data has an attribute text_plain, writes to file or stdout.
        - **Parameters**
            - outfile : Default is None and returns string. If given, writes to file.
        """
        if hasattr(self,'text_plain') and self.text_plain:
            if outfile is None:
                print(self.text_plain)
            else:
                with open(outfile,'w') as f:
                    f.write(self.text_plain)
        else:
            raise AttributeError("No 'text_plain' attribute found.")

    def __repr__(self):
        items= []
        for k,v in self.__dict__.items():
            if type(v) not in (str,float,int,range) and not isinstance(v,Dict2Data):
                if isinstance(v,np.ndarray):
                    v = "<{}:shape={}>".format(v.__class__.__name__,np.shape(v))
                elif type(v) in (list,tuple):
                    v = ("<{}:len={}>".format(v.__class__.__name__,len(v)) if len(v) > 10 else v)
                else:
                    v = v.__class__
            if isinstance(v,Dict2Data):
                v = repr(v).replace("\n","\n    ")
            items.append(f"    {k} = {v}")

        return "Data(\n{}\n)".format('\n'.join(items))
    def __getstate__(self):
        pass  #This is for pickling

    def __setattr__(self, name, value):
        if name in self.__dict__:
            raise AttributeError(f"Outside assignment is restricted for already present attribute.")
        else:
            self.__dict__[name] = value
    # Dictionary-wise access
    def keys(self):
        return self.__dict__.keys()
    def __getitem__(self,key):
        return self.__dict__[key]
    def items(self):
        return self.__dict__.items()


# Cell
def read_asxml(path = None):
    """
    - Reads a big vasprun.xml file into memory once and then apply commands. If current folder contains `vasprun.xml` file, it automatically picks it.

    - **Parameters**
        - path : Path/To/vasprun.xml

    - **Returns**
        - xml_data : Xml object to use in other functions
    """
    if(path==None):
        path='./vasprun.xml'
    if not os.path.isfile(path):
        raise FileNotFoundError("File: '{}'' does not exist!".format(path))

    elif 'vasprun.xml' not in path:
        raise Exception("File name should end with 'vasprun.xml'")

    else:
        fsize = gu.get_file_size(path)
        value = float(fsize.split()[0])
        print_str = """
        Memory Consumption Warning!
        ---------------------------
        File: {} is large ({}). It may consume a lot of memory (generally 3 times the file size).
            An alternative way is to parse vasprun.xml is by using `Vasp2Visual` module in Powershell by command `pivotpy.load_export('path/to/vasprun.xml'), which runs underlying powershell functions to load data whith efficient memory managment. It works on Windows/Linux/MacOS if you have powershell core and Vasp2Visual installed on it.
        """.format(path,fsize)
        if 'MB' in fsize and value > 200:
            print(gu.color.y(textwrap.dedent(print_str)))
        elif 'GB' in fsize and value > 1:
            print(gu.color.y(textwrap.dedent(print_str)))

        return ET.parse(path).getroot() # THis is xml_data for other functions

# Cell
def xml2dict(xmlnode_or_filepath):
    """Convert xml node or xml file content to dictionary. All output text is in string format, so further processing is required to convert into data types/split etc.
    - The only paramenter `xmlnode_or_filepath` is either a path to an xml file or an `xml.etree.ElementTree.Element` object.
    - Each node has `tag,text,attr,nodes` attributes. Every text element can be accessed via
    `xml2dict()['nodes'][index]['nodes'][index]...` tree which makes it simple.
    """
    if isinstance(xmlnode_or_filepath,str):
        node = read_asxml(xmlnode_or_filepath)
    else:
        node = xmlnode_or_filepath

    text = node.text.strip() if node.text else ''
    nodes = [xml2dict(child) for child in list(node)]
    return {'tag': node.tag,'text': text, 'attr':node.attrib, 'nodes': nodes}

# Cell
def exclude_kpts(xml_data):
    """
    - Returns number of kpoints to exclude used from IBZKPT.
    - **Parameters**
        - xml_data : From `read_asxml` function
    - **Returns**
        - int      : Number of kpoints to exclude.
    """
    for kpts in xml_data.iter('varray'):
        if(kpts.attrib=={'name': 'weights'}):
            weights=[float(arr.text.strip()) for arr in kpts.iter('v')]
    exclude=[]
    [exclude.append(item) for item in weights if item!=weights[-1]];
    skipk=len(exclude) #that much to skip
    return skipk

# Cell
def get_ispin(xml_data):
    """
    - Returns value of ISPIN.
    - **Parameters**
        - xml_data : From `read_asxml` function
    - **Returns**
        - int      : Value of ISPIN.
    """
    for item in xml_data.iter('i'):
        if(item.attrib=={'type': 'int', 'name': 'ISPIN'}):
            return int(item.text)

# Cell
def get_summary(xml_data):
    """
    - Returns overview of system parameters.
    - **Parameters**
        - xml_data : From `read_asxml` function
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes accessible via dot notation.
    """
    for i_car in xml_data.iter('incar'):
        incar={car.attrib['name']:car.text.strip() for car in i_car}
    n_ions=[int(atom.text) for atom in xml_data.iter('atoms')][0]
    type_ions=[int(atom_types.text) for atom_types in xml_data.iter('types')][0]
    elem=[info[0].text.strip() for info in xml_data.iter('rc')]
    elem_name=[]; #collect IONS names
    [elem_name.append(item) for item in elem[:-type_ions] if item not in elem_name]
    elem_index=[0]; #start index
    [elem_index.append((int(entry)+elem_index[-1])) for entry in elem[-type_ions:]];
    ISPIN=get_ispin(xml_data=xml_data)
    NELECT = int([i.text.strip().split('.')[0] for i in xml_data.iter('i') if i.attrib['name']=='NELECT'][0])
    # Fields
    try:
        for pro in xml_data.iter('partial'):
            dos_fields=[field.text.strip() for field in pro.iter('field')]
            dos_fields = [field for field in dos_fields if 'energy' not in field]
    except:
        dos_fields = []
    for i in xml_data.iter('i'): #efermi for condition required.
        if(i.attrib=={'name': 'efermi'}):
            efermi=float(i.text)
    #Writing information to a dictionary
    info_dic={'SYSTEM':incar['SYSTEM'],'NION':n_ions,'NELECT':NELECT,'TypeION':type_ions,
              'ElemName':elem_name,'ElemIndex':elem_index,'E_Fermi': efermi,'ISPIN':ISPIN,
              'fields':dos_fields,'incar':incar}
    return Dict2Data(info_dic)

# Cell
def join_ksegments(kpath,kseg_inds=[]):
    """Joins a broken kpath's next segment to previous. `kseg_inds` should be list of first index of next segment"""
    path_list = np.array(kpath)
    if kseg_inds:
        for ind in kseg_inds:
            path_list[ind:] -= path_list[ind] - path_list[ind-1]
    return list(path_list)

def get_kpts(xml_data, skipk=0,kseg_inds=[]):
    r"""Returns kpoints and calculated kpath.

    **Parameters**
    - xml_data: From `read_asxml` function.
    - skipk : (int) Number of initil kpoints to skip.
    - kseg_inds : (list) List of indices of kpoints where path is broken.

    **Returns**
    - Data : pivotpy.Dict2Data with attibutes `kpath` and `kpoints`.
    """
    for kpts in xml_data.iter('varray'):
        if(kpts.attrib=={'name': 'kpointlist'}):
            kpoints=[[float(item) for item in arr.text.split()] for arr in kpts.iter('v')]
    kpoints=np.array(kpoints[skipk:])
    #KPath solved.
    kpath=[0];pts=kpoints
    [kpath.append(np.round(np.sqrt(np.sum((pt1-pt2)**2))+kpath[-1],6)) for pt1,pt2 in zip(pts[:-1],pts[1:])]
    # If broken path, then join points.
    kpath = join_ksegments(kpath,kseg_inds)
    return Dict2Data({'NKPTS':len(kpoints),'kpoints':kpoints,'kpath':kpath})

# Cell
def get_tdos(xml_data,spin_set=1,elim=[]):
    """
    - Returns total dos for a spin_set (default 1) and energy limit. If spin-polarized calculations, gives SpinUp and SpinDown keys as well.
    - **Parameters**
        - xml_data : From `read_asxml` function
        - spin_set : int, default is 1.and
        - elim     : List [min,max] of energy, default empty.
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes E_Fermi, ISPIN,tdos.
    """
    tdos=[]; #assign for safely exit if wrong spin set entered.
    ISPIN = get_ispin(xml_data=xml_data)
    for neighbor in xml_data.iter('dos'):
        for item in neighbor[1].iter('set'):
            if(ISPIN==1 and spin_set==1):
                if(item.attrib=={'comment': 'spin 1'}):
                    tdos=np.array([[float(entry) for entry in arr.text.split()] for arr in item])
            if(ISPIN==2 and spin_set==1):
                if(item.attrib=={'comment': 'spin 1'}):
                    tdos_1=np.array([[float(entry) for entry in arr.text.split()] for arr in item])
                if(item.attrib=={'comment': 'spin 2'}):
                    tdos_2=np.array([[float(entry) for entry in arr.text.split()] for arr in item])
                    tdos = {'SpinUp':tdos_1,'SpinDown':tdos_2}
            if(spin_set!=1): #can get any
                if(item.attrib=={'comment': 'spin {}'.format(spin_set)}):
                    tdos=np.array([[float(entry) for entry in arr.text.split()] for arr in item])
    for i in xml_data.iter('i'): #efermi for condition required.
        if(i.attrib=={'name': 'efermi'}):
            efermi=float(i.text)
    dos_dic= {'E_Fermi':efermi,'ISPIN':ISPIN,'tdos':tdos}
    #Filtering in energy range.
    if elim: #check if elim not empty
        if(ISPIN==1 and spin_set==1):
            up_ind=np.max(np.where(tdos[:,0]-efermi<=np.max(elim)))+1
            lo_ind=np.min(np.where(tdos[:,0]-efermi>=np.min(elim)))
            tdos=tdos[lo_ind:up_ind,:]
        if(ISPIN==2 and spin_set==1):
            up_ind=np.max(np.where(tdos['SpinUp'][:,0]-efermi<=np.max(elim)))+1
            lo_ind=np.min(np.where(tdos['SpinUp'][:,0]-efermi>=np.min(elim)))
            tdos = {'SpinUp':tdos_1[lo_ind:up_ind,:],'SpinDown':tdos_2[lo_ind:up_ind,:]}
        if(spin_set!=1):
            up_ind=np.max(np.where(tdos[:,0]-efermi<=np.max(elim)))+1
            lo_ind=np.min(np.where(tdos[:,0]-efermi>=np.min(elim)))
            tdos=tdos[lo_ind:up_ind,:]
        dos_dic= {'E_Fermi':efermi,'ISPIN':ISPIN,'grid_range':range(lo_ind,up_ind),'tdos':tdos}
    return Dict2Data(dos_dic)

# Cell
def get_evals(xml_data,skipk=None,elim=[]):
    """
    - Returns eigenvalues as numpy array. If spin-polarized calculations, gives SpinUp and SpinDown keys as well.
    - **Parameters**
        - xml_data : From `read_asxml` function
        - skipk    : Number of initil kpoints to skip.
        - elim     : List [min,max] of energy, default empty.
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes evals and related parameters.
    """
    evals=[]; #assign for safely exit if wrong spin set entered.
    ISPIN=get_ispin(xml_data=xml_data)
    if skipk!=None:
        skipk=skipk
    else:
        skipk=exclude_kpts(xml_data=xml_data) #that much to skip by default
    for neighbor in xml_data.iter('eigenvalues'):
        for item in neighbor[0].iter('set'):
            if(ISPIN==1):
                if(item.attrib=={'comment': 'spin 1'}):
                    evals=np.array([[float(th.text.split()[0]) for th in thing] for thing in item])[skipk:]
                    NBANDS=len(evals[0])
            if(ISPIN==2):
                if(item.attrib=={'comment': 'spin 1'}):
                    eval_1=np.array([[float(th.text.split()[0]) for th in thing] for thing in item])[skipk:]
                if(item.attrib=={'comment': 'spin 2'}):
                    eval_2=np.array([[float(th.text.split()[0]) for th in thing] for thing in item])[skipk:]
                    evals={'SpinUp':eval_1,'SpinDown':eval_2}
                    NBANDS=len(eval_1[0])

    for i in xml_data.iter('i'): #efermi for condition required.
        if(i.attrib=={'name': 'efermi'}):
            efermi=float(i.text)
    evals_dic={'E_Fermi':efermi,'ISPIN':ISPIN,'NBANDS':NBANDS,'evals':evals,'indices': range(NBANDS)}
    if elim: #check if elim not empty
        if(ISPIN==1):
            up_ind=np.max(np.where(evals[:,:]-efermi<=np.max(elim))[1])+1
            lo_ind=np.min(np.where(evals[:,:]-efermi>=np.min(elim))[1])
            evals=evals[:,lo_ind:up_ind]
        if(ISPIN==2):
            up_ind=np.max(np.where(eval_1[:,:]-efermi<=np.max(elim))[1])+1
            lo_ind=np.min(np.where(eval_1[:,:]-efermi>=np.min(elim))[1])
            evals={'SpinUp':eval_1[:,lo_ind:up_ind],'SpinDown':eval_2[:,lo_ind:up_ind]}
        NBANDS = int(up_ind - lo_ind) #update Bands
        evals_dic['NBANDS'] = NBANDS
        evals_dic['indices'] = range(lo_ind,up_ind)
        evals_dic['evals'] = evals
    return Dict2Data(evals_dic)

# Cell
def get_bands_pro_set(xml_data,
                      spin_set=1,
                      skipk=0,
                      bands_range=None,
                      set_path=None):
    """
    - Returns bands projection of a spin_set(default 1). If spin-polarized calculations, gives SpinUp and SpinDown keys as well.
    - **Parameters**
        - xml_data    : From `read_asxml` function
        - skipk       : Number of initil kpoints to skip (Default 0).
        - spin_set    : Spin set to get, default is 1.
        - bands_range : If elim used in `get_evals`,that will return bands_range to use here. Note that range(0,2) will give 2 bands 0,1 but tuple (0,2) will give 3 bands 0,1,2.
        - set_path     : path/to/_set[1,2,3,4].txt, works if `split_vasprun` is used before.
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes of bands projections and related parameters.
    """
    if bands_range != None:
        check_list = list(bands_range)
        if check_list==[]:
            raise ValueError("No bands prjections found in given energy range.")
    # Try to read _set.txt first. instance check is important.
    if isinstance(set_path,str) and os.path.isfile(set_path):
        _header = islice2array(set_path,nlines=1,raw=True,exclude=None)
        _shape = [int(v) for v in _header.split('=')[1].strip().split(',')]
        NKPTS, NBANDS, NIONS, NORBS = _shape
        if NORBS == 3:
            fields = ['s','p','d']
        elif NORBS == 9:
            fields = ['s','py','pz','px','dxy','dyz','dz2','dxz','x2-y2']
        else:
            fields = [str(i) for i in range(NORBS)] #s,p,d in indices.
        COUNT = NIONS*NBANDS*(NKPTS-skipk)*NORBS
        start = NBANDS*NIONS*skipk
        nlines = None # Read till end.
        if bands_range:
            _b_r = list(bands_range)
            # First line is comment but it is taken out by exclude in islice2array.
            start = [[NIONS*NBANDS*k + NIONS*b for b in _b_r] for k in range(skipk,NKPTS)]
            start = [s for ss in start for s in ss] #flatten
            nlines = NIONS # 1 band has nions
            NBANDS = _b_r[-1]-_b_r[0]+1 # upadte after start

        NKPTS = NKPTS-skipk # Update after start, and bands_range.
        COUNT = NIONS*NBANDS*NKPTS*NORBS
        data = islice2array(set_path,start=start,nlines=nlines,count=COUNT)
        data = data.reshape((NKPTS,NBANDS,NIONS,NORBS)).transpose([2,0,1,3])
        return Dict2Data({'labels':fields,'pros':data})

    #Collect Projection fields
    fields=[];
    for pro in xml_data.iter('projected'):
        for arr in pro.iter('field'):
            if('eig' not in arr.text and 'occ' not in arr.text):
                fields.append(arr.text.strip())
    NORBS = len(fields)
    #Get NIONS for reshaping data
    NIONS=[int(atom.text) for atom in xml_data.iter('atoms')][0]

    for spin in xml_data.iter('set'):
        if spin.attrib=={'comment': 'spin{}'.format(spin_set)}:
            k_sets = [kp for kp in spin.iter('set') if 'kpoint' in kp.attrib['comment']]
    k_sets = k_sets[skipk:]
    NKPTS = len(k_sets)
    band_sets = []
    for k_s in k_sets:
        b_set = [b for b in k_s.iter('set') if 'band' in b.attrib['comment']]
        if bands_range == None:
            band_sets.extend(b_set)
        else:
            b_r = list(bands_range)
            band_sets.extend(b_set[b_r[0]:b_r[-1]+1])
    NBANDS = int(len(band_sets)/len(k_sets))
    try:
        # Error prone solution but 5 times fater than list comprehension.
        bands_pro = (float(t) for band in band_sets for l in band.iter('r') for t in l.text.split())
        COUNT = NKPTS*NBANDS*NORBS*NIONS # Must be counted for performance.
        data = np.fromiter(bands_pro,dtype=float,count=COUNT)
    except:
        # Alternate slow solution
        print("Error using `np.fromiter`.\nFalling back to (slow) list comprehension...",end=' ')
        bands_pro = (l.text for band in band_sets for l in band.iter('r'))
        bands_pro = [[float(t) for t in text.split()] for text in bands_pro]
        data = np.array(bands_pro)
        del bands_pro # Release memory
        print("Done.")

    data = data.reshape((NKPTS,NBANDS,NIONS,NORBS)).transpose((2,0,1,3))
    return Dict2Data({'labels':fields,'pros':data})

# Cell
def get_dos_pro_set(xml_data,spin_set=1,dos_range=None):
    """
    - Returns dos projection of a spin_set(default 1) as numpy array. If spin-polarized calculations, gives SpinUp and SpinDown keys as well.
    - **Parameters**
        - xml_data    : From `read_asxml` function
        - spin_set    : Spin set to get, default 1.
        - dos_range   : If elim used in `get_tdos`,that will return dos_range to use here..
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes of dos projections and related parameters.
    """
    if dos_range != None:
        check_list = list(dos_range)
        if check_list == []:
            raise ValueError("No DOS prjections found in given energy range.")

    n_ions=get_summary(xml_data=xml_data).NION
    for pro in xml_data.iter('partial'):
        dos_fields=[field.text.strip()for field in pro.iter('field')]
        #Collecting projections.
        dos_pro=[]; set_pro=[]; #set_pro=[] in case spin set does not exists
        for ion in range(n_ions):
            for node in pro.iter('set'):
                if(node.attrib=={'comment': 'ion {}'.format(ion+1)}):
                    for spin in node.iter('set'):
                        if(spin.attrib=={'comment': 'spin {}'.format(spin_set)}):
                            set_pro=[[float(entry) for entry in r.text.split()] for r in spin.iter('r')]
            dos_pro.append(set_pro)
    if dos_range==None: #full grid computed.
        dos_pro=np.array(dos_pro) #shape(NION,e_grid,pro_fields)
    else:
        dos_range=list(dos_range)
        min_ind=dos_range[0]
        max_ind=dos_range[-1]+1
        dos_pro=np.array(dos_pro)[:,min_ind:max_ind,:]
    final_data=np.array(dos_pro) #shape(NION,e_grid,pro_fields)
    return Dict2Data({'labels':dos_fields,'pros':final_data})

# Cell
def _add_text_attr2poscar(poscar_data):
    "Returns poscar_data with additional attribute `text_plain` to write to file."
    scale = np.linalg.norm(poscar_data.basis[0])
    unique_d = poscar_data.unique.to_dict()

    out_str = f"{poscar_data.SYSTEM}  # Generated by Pivotpy\n  {scale:<20.14f}\n"
    out_str += '\n'.join(["{:>22.16f}{:>22.16f}{:>22.16f}".format(*a) for a in poscar_data.basis/scale])
    out_str += "\n  " + '\t'.join(unique_d.keys())
    out_str += "\n  " + '\t'.join([str(len(v)) for v in unique_d.values()])
    out_str += "\nDirect\n"
    out_str += '\n'.join("{:>21.16f}{:>21.16f}{:>21.16f}".format(*a) for a in poscar_data.positions)

    poscar_data.text_plain = out_str
    return poscar_data

def get_structure(xml_data):
    """
    - Returns structure's volume,basis,positions and rec-basis.
    - **Parameters**
        - xml_data : From `read_asxml` function.
    - **Returns**
        - Data     : pivotpy.Dict2Data with attibutes volume,basis,positions rec_basis and labels.
    """
    SYSTEM = [i.text for i in xml_data.iter('i') if i.attrib['name'] == 'SYSTEM'][0]

    for final in xml_data.iter('structure'):
        if(final.attrib=={'name': 'finalpos'}):
            for i in final.iter('i'):
                volume=float(i.text)
            for arr in final.iter('varray'):
                if(arr.attrib=={'name': 'basis'}):
                    basis=[[float(a) for a in v.text.split()] for v in arr.iter('v')]
                if(arr.attrib=={'name': 'rec_basis'}):
                    rec_basis=[[float(a) for a in v.text.split()] for v in arr.iter('v')]
                if(arr.attrib=={'name': 'positions'}):
                    positions=[[float(a) for a in v.text.split()] for v in arr.iter('v')]
    # element labels
    types  = [int(_type.text) for _type in xml_data.iter('types')][0]
    elems  = [info[0].text.strip() for info in xml_data.iter('rc')]
    _inds  = np.array([int(a) for a in elems[-types:]])
    _nums  = [k + 1 for i in _inds for k in range(i)]
    labels = [f"{e} {i}" for i, e in zip(_nums,elems)]

    INDS = np.cumsum([0,*_inds]).astype(int)
    Names = list(np.unique(elems[:-types]))
    unique_d = {e:range(INDS[i],INDS[i+1]) for i,e in enumerate(Names)}

    st_dic={'SYSTEM':SYSTEM,'volume': volume,'basis': np.array(basis),'rec_basis': np.array(rec_basis),'positions': np.array(positions),
            'labels':labels,'unique': unique_d}
    return _add_text_attr2poscar(Dict2Data(st_dic))

# Cell
def export_vasprun(path        = None,
                   skipk       = None,
                   elim        = [],
                   kseg_inds   = [],
                   shift_kpath = 0,
                   try_pwsh    = True
                   ):
    """
    - Returns a full dictionary of all objects from `vasprun.xml` file. It first try to load the data exported by powershell's `Export-VR(Vasprun)`, which is very fast for large files. It is recommended to export large files in powershell first.
    - **Parameters**
        - path       : Path to `vasprun.xml` file. Default is `'./vasprun.xml'`.
        - skipk      : Default is None. Automatically detects kpoints to skip.
        - elim       : List [min,max] of energy interval. Default is [], covers all bands.
        - kseg_inds : List of indices of kpoints where path is broken.
        - shift_kpath: Default 0. Can be used to merge multiple calculations on single axes side by side.
        - try_pwsh   : Default is True and tries to load data exported by `Vasp2Visual` in Powershell.
    - **Returns**
        - Data : Data accessible via dot notation containing nested Data objects:
            - sys_info  : System Information
            - dim_info  : Contains information about dimensions of returned objects.
            - kpoints   : numpy array of kpoints with excluded IBZKPT points
            - kpath     : 1D numpy array directly accessible for plot.
            - bands     : Data containing bands.
            - tdos      : Data containing total dos.
            - pro_bands : Data containing bands projections.
            - pro_dos   : Data containing dos projections.
            - poscar    : Data containing basis,positions, rec_basis and volume.
    """
    # Try to get files if exported data in PowerShell.
    if try_pwsh:
        req_files = ['Bands.txt','tDOS.txt','pDOS.txt','Projection.txt','SysInfo.py']
        if path and os.path.isfile(path):
            req_files = [os.path.join(
                os.path.dirname(os.path.abspath(path)),f) for f in req_files]
        logic = [os.path.isfile(f) for f in req_files]
        if not False in logic:
            print('Loading from PowerShell Exported Data...')
            return load_export(path=(path if path else './vasprun.xml'))

    # Proceed if not files from PWSH
    if path==None:
        path='./vasprun.xml'
    try:
        xml_data = read_asxml(path=path)
    except:
        return
    base_dir = os.path.split(os.path.abspath(path))[0]
    set_paths = [os.path.join(base_dir,"_set{}.txt".format(i)) for i in (1,2)]
    #First exclude unnecessary kpoints. Includes only same weight points
    if skipk!=None:
        skipk=skipk
    else:
        skipk = exclude_kpts(xml_data=xml_data) #that much to skip by default
    info_dic = get_summary(xml_data=xml_data) #Reads important information of system.
    #KPOINTS
    kpts = get_kpts(xml_data=xml_data,skipk=skipk,kseg_inds=kseg_inds)
    #EIGENVALS
    eigenvals = get_evals(xml_data=xml_data,skipk=skipk,elim=elim)
    #TDOS
    tot_dos = get_tdos(xml_data=xml_data,spin_set=1,elim=elim)
    #Bands and DOS Projection
    if elim:
        bands_range = eigenvals.indices #indices in range form.
        grid_range=tot_dos.grid_range
    else:
        bands_range=None #projection function will read itself.
        grid_range=None
    if(info_dic.ISPIN==1):
        pro_bands = get_bands_pro_set(xml_data=xml_data,spin_set=1,skipk=skipk,bands_range=bands_range,set_path=set_paths[0])
        pro_dos = get_dos_pro_set(xml_data=xml_data,spin_set=1,dos_range=grid_range)
    if(info_dic.ISPIN==2):
        pro_1 = get_bands_pro_set(xml_data=xml_data,spin_set=1,skipk=skipk,bands_range=bands_range,set_path=set_paths[0])
        pro_2 = get_bands_pro_set(xml_data=xml_data,spin_set=2,skipk=skipk,bands_range=bands_range,set_path=set_paths[1])
        pros={'SpinUp': pro_1.pros,'SpinDown': pro_2.pros}#accessing spins in dictionary after .pro.
        pro_bands={'labels':pro_1.labels,'pros': pros}
        pdos_1 = get_dos_pro_set(xml_data=xml_data,spin_set=1,dos_range=grid_range)
        pdos_2 = get_dos_pro_set(xml_data=xml_data,spin_set=1,dos_range=grid_range)
        pdos={'SpinUp': pdos_1.pros,'SpinDown': pdos_2.pros}#accessing spins in dictionary after .pro.
        pro_dos={'labels':pdos_1.labels,'pros': pdos}

    #Structure
    poscar = get_structure(xml_data=xml_data)
    poscar = {'SYSTEM':info_dic.SYSTEM,**poscar.to_dict()}
    #Dimensions dictionary.
    dim_dic={'kpoints':'(NKPTS,3)','kpath':'(NKPTS,1)','bands':'⇅(NKPTS,NBANDS)','dos':'⇅(grid_size,3)','pro_dos':'⇅(NION,grid_size,en+pro_fields)','pro_bands':'⇅(NION,NKPTS,NBANDS,pro_fields)'}
    #Writing everything to be accessible via dot notation
    kpath=[k+shift_kpath for k in kpts.kpath]  # shift kpath for side by side calculations.
    full_dic={'sys_info':info_dic,'dim_info':dim_dic,'kpoints':kpts.kpoints,'kpath':kpath,'bands':eigenvals,
             'tdos':tot_dos,'pro_bands':pro_bands,'pro_dos':pro_dos,'poscar': poscar}
    return Dict2Data(full_dic)

# Cell
def _validate_evr(path_evr=None,**kwargs):
    "Validates data given for plotting functions. Returns a tuple of (Boolean,data)."
    if type(path_evr) == Dict2Data:
        try:
            path_evr.bands;path_evr.kpath # check if data is valid.
            return path_evr
        except:
            raise ValueError('Provide Data is not valid!.')
    elif path_evr is None:
        path_evr = './vasprun.xml'

    if isinstance(path_evr,str):
        if os.path.isfile(path_evr):
            # kwargs -> skipk=skipk,elim=elim,kseg_inds=kseg_inds
            return export_vasprun(path=path_evr,**kwargs)
        else:
            raise FileNotFoundError(f'File {path_evr!r} not found!')
    # Other things are not valid.
    raise ValueError('path_evr must be a path string or output of export_vasprun function.')

# Cell
def load_export(path= './vasprun.xml',
                kseg_inds =[],
                shift_kpath = 0,
                path_to_ps='pwsh',
                skipk = None,
                max_filled = 10,
                max_empty = 10,
                keep_files = True
                ):
    """
    - Returns a full dictionary of all objects from `vasprun.xml` file exported using powershell.
    - **Parameters**
        - path       : Path to `vasprun.xml` file. Default is `'./vasprun.xml'`.
        - skipk      : Default is None. Automatically detects kpoints to skip.
        - path_to_ps : Path to `powershell.exe`. Automatically picks on Windows and Linux if added to PATH.
        - kseg_inds : List of indices of kpoints where path is broken.
        - shift_kpath: Default 0. Can be used to merge multiple calculations side by side.
        - keep_files : Could be use to clean exported text files. Default is True.
        - max_filled : Number of filled bands below and including VBM. Default is 10.
        - max_empty  : Number of empty bands above VBM. Default is 10.
    - **Returns**
        - Data : Data accessible via dot notation containing nested Data objects:
            - sys_info  : System Information
            - dim_info  : Contains information about dimensions of returned objects.
            - kpoints   : numpy array of kpoints with excluded IBZKPT points
            - kpath     : 1D numpy array directly accessible for plot.
            - bands     : Data containing bands.
            - tdos      : Data containing total dos.
            - pro_bands : Data containing bands projections.
            - pro_dos   : Data containing dos projections.
            - poscar    : Data containing basis,positions, rec_basis and volume.
    """
    that_loc, file_name = os.path.split(os.path.abspath(path)) # abspath is important to split.
    with gu.set_dir(that_loc):
        # Goes there and work
        i = 0
        required_files = ['Bands.txt','tDOS.txt','pDOS.txt','Projection.txt','SysInfo.py']
        for _file in required_files:
            if os.path.isfile(_file):
               i = i + 1
        if i < 5:
            if skipk != None:
                gu.ps2std(path_to_ps=path_to_ps,ps_command='Import-Module Vasp2Visual; Export-VR -InputFile {} -MaxFilled {} -MaxEmpty {} -SkipK {}'.format(path,max_filled,max_empty,skipk))
            else:
                gu.ps2std(path_to_ps=path_to_ps,ps_command='Import-Module Vasp2Visual; Export-VR -InputFile {} -MaxFilled {} -MaxEmpty {}'.format(path,max_filled,max_empty))

        # Enable loading SysInfo.py file as source.
        _vars = SourceFileLoader("SysInfo", "./SysInfo.py").load_module()

        SYSTEM            = _vars.SYSTEM
        NKPTS             = _vars.NKPTS
        NBANDS            = _vars.NBANDS
        NFILLED           = _vars.NFILLED
        TypeION           = _vars.TypeION
        NION              = _vars.NION
        NELECT            = _vars.NELECT
        nField_Projection = _vars.nField_Projection
        E_Fermi           = _vars.E_Fermi
        ISPIN             = _vars.ISPIN
        ElemIndex         = _vars.ElemIndex
        ElemName          = _vars.ElemName
        poscar            = {'SYSTEM': SYSTEM,
                            'volume':_vars.volume,
                            'basis' : np.array(_vars.basis),
                            'rec_basis': np.array(_vars.rec_basis),
                            'positions': np.array(_vars.positions)
                            }
        fields            = _vars.fields
        incar             = _vars.INCAR

        # Elements Labels
        elem_labels = []
        for i, name in enumerate(ElemName):
            for ind in range(ElemIndex[i],ElemIndex[i+1]):
                elem_labels.append(f"{name} {str(ind - ElemIndex[i] + 1)}")
        poscar.update({'labels': elem_labels})
        # Unique Elements Ranges
        unique_d = {}
        for i,e in enumerate(ElemName):
            unique_d.update({e:range(ElemIndex[i],ElemIndex[i+1])})
        poscar.update({'unique': unique_d})

        # Load Data
        bands= np.loadtxt('Bands.txt').reshape((-1,NBANDS+4)) #Must be read in 2D even if one row only.
        start = int(open('Bands.txt').readline().split()[4][1:])
        pro_bands= np.loadtxt('Projection.txt').reshape((-1,NBANDS*nField_Projection))
        pro_dos = np.loadtxt('pDOS.txt')
        dos= np.loadtxt('tDOS.txt')

        # Keep or delete only if python generates files (i < 5 case.)
        if(keep_files==False and i==5):
            for file in required_files:
                os.remove(file)
        # Returns back

    # Work now!
    sys_info = {'SYSTEM': SYSTEM,'NION': NION,'NELECT':NELECT,'TypeION': TypeION,'ElemName': ElemName,
                'E_Fermi': E_Fermi,'fields':fields, 'incar': incar,'ElemIndex': ElemIndex,'ISPIN': ISPIN}
    dim_info = {'kpoints': '(NKPTS,3)','kpath': '(NKPTS,1)','bands': '⇅(NKPTS,NBANDS)','dos': '⇅(grid_size,3)',
                'pro_dos': '⇅(NION,grid_size,en+pro_fields)','pro_bands': '⇅(NION,NKPTS,NBANDS,pro_fields)'}

    bands_dic,tdos_dic,pdos_dic,pro_dic,kpath={},{},{},{},[]
    if(ISPIN==1):
        kpath   = bands[:,3]
        kpoints = bands[:,:3]
        evals   = bands[:,4:]
        bands_dic = {'E_Fermi': E_Fermi, 'ISPIN': ISPIN, 'NBANDS': NBANDS, 'evals': evals, 'indices': range(start,start+NBANDS)}
        tdos_dic  = {'E_Fermi': E_Fermi, 'ISPIN': ISPIN,'tdos': dos}
        pdos      = pro_dos.reshape(NION,-1,nField_Projection+1)
        pdos_dic  = {'labels': fields,'pros': pdos}
        pros      = pro_bands.reshape(NION,NKPTS,NBANDS,-1)
        pro_dic   = {'labels': fields,'pros': pros}
    if(ISPIN==2):
        # Bands
        kpath   = bands[:NKPTS,3]
        kpoints = bands[:NKPTS,:3]
        SpinUp  = bands[:NKPTS,4:]
        SpinDown= bands[NKPTS:,4:]
        evals   = {'SpinUp':SpinUp,'SpinDown': SpinDown}
        bands_dic = {'E_Fermi': E_Fermi, 'ISPIN': ISPIN, 'NBANDS': NBANDS, 'evals': evals,'indices': range(start,start+NBANDS)}
        # tDOS
        dlen    = int(np.shape(dos)[0]/2)
        SpinUp  = dos[:dlen,:]
        SpinDown= dos[dlen:,:]
        tdos    = {'SpinUp':SpinUp,'SpinDown': SpinDown}
        tdos_dic= {'E_Fermi': E_Fermi, 'ISPIN': ISPIN,'tdos': tdos}

        # pDOS
        plen    = int(np.shape(pro_dos)[0]/2)
        SpinUp  = pro_dos[:plen,:].reshape(NION,-1,nField_Projection+1)
        SpinDown= pro_dos[plen:,:].reshape(NION,-1,nField_Projection+1)
        pdos    = {'SpinUp':SpinUp,'SpinDown': SpinDown}
        pdos_dic= {'labels': fields,'pros': pdos}

        # projections
        pblen  = int(np.shape(pro_bands)[0]/2)
        SpinUp  = pro_bands[:pblen,:].reshape(NION,NKPTS,NBANDS,-1)
        SpinDown= pro_bands[pblen:,:].reshape(NION,NKPTS,NBANDS,-1)
        pros    = {'SpinUp': SpinUp,'SpinDown': SpinDown}
        pro_dic = {'labels': fields,'pros': pros}
    # If broken path, then join points.
    kpath = join_ksegments(kpath,kseg_inds)
    kpath=[k+shift_kpath for k in kpath.copy()] # Shift kpath
    full_dic = {'sys_info': sys_info,'dim_info': dim_info,'kpoints': kpoints,'kpath':kpath,               'bands':bands_dic,'tdos':tdos_dic,'pro_bands': pro_dic ,'pro_dos': pdos_dic,
               'poscar':_add_text_attr2poscar(Dict2Data(poscar))}
    return Dict2Data(full_dic)

# Cell
def dump_dict(dict_data = None, dump_to = 'pickle',outfile = None,indent=1):
    """
    - Dump an `export_vasprun` or `load_export`'s `Data` object or any dictionary to json or pickle string/file. It convert `Dict2Data` to dictionary before serializing to json/pickle, so json/pickle.loads() of converted Data would be a simple dictionary, pass that to `Dict2Data` to again make accessible via dot notation.
    - **Parameters**
        - dict_data : Any dictionary/Dict2Data object containg numpy arrays, including `export_vasprun` or `load_export` output.
        - dump_to  : Defualt is `pickle` or `json`.
        - outfile  : Defualt is None and return string. File name does not require extension.
        - indent   : Defualt is 1. Only works for json.
    """
    if dump_to not in ['pickle','json']:
        raise ValueError("`dump_to` expects 'pickle' or 'json', got '{}'".format(dump_to))
    try: dict_obj = dict_data.to_dict() # Change Data object to dictionary
    except: dict_obj = dict_data
    if dump_to == 'pickle':
        if outfile == None:
            return pickle.dumps(dict_obj)
        outfile = outfile.split('.')[0] + '.pickle'
        with open(outfile,'wb') as f:
            pickle.dump(dict_obj,f)
    if dump_to == 'json':
        if outfile == None:
            return json.dumps(dict_obj,cls = gu.EncodeFromNumpy,indent=indent)
        outfile = outfile.split('.')[0] + '.json'
        with open(outfile,'w') as f:
            json.dump(dict_obj,f,cls = gu.EncodeFromNumpy,indent=indent)
    return None

# Cell
def load_from_dump(file_or_str,keep_as_dict=False):
    """
    - Loads a json/pickle dumped file or string by auto detecting it.
    - **Parameters**
        - file_or_str : Filename of pickl/json or their string.
        - keep_as_dict: Defualt is False and return `Data` object. If True, returns dictionary.
    """
    out = {}
    if not isinstance(file_or_str,bytes):
        try: #must try, else fails due to path length issue
            if os.path.isfile(file_or_str):
                if '.pickle' in file_or_str:
                    with open(file_or_str,'rb') as f:
                        out = pickle.load(f)

                elif '.json' in file_or_str:
                    with open(file_or_str,'r') as f:
                        out = json.load(f,cls = gu.DecodeToNumpy)

            else: out = json.loads(file_or_str,cls = gu.DecodeToNumpy)
            # json.loads required in else and except both as long str > 260 causes issue in start of try block
        except: out = json.loads(file_or_str,cls = gu.DecodeToNumpy)
    elif isinstance(file_or_str,bytes):
            out = pickle.loads(file_or_str)

    if type(out) is dict and keep_as_dict == False:
        return Dict2Data(out)
    return out

# Cell
def islice2array(path_or_islice,dtype=float,delimiter='\s+',
                include=None,exclude='#',raw=False,fix_format = True,
                start=0,nlines=None,count=-1,cols=None,new_shape=None
                ):
    """
    - Reads a sliced array from txt,csv type files and return to array. Also manages if columns lengths are not equal and return 1D array. It is faster than loading  whole file into memory. This single function could be used to parse EIGENVAL, PROCAR, DOCAR and similar files with just a combination of `exclude, include,start,stop,step` arguments.
    - **Parameters**
        - path_or_islice: Path/to/file or `itertools.islice(file_object)`. islice is interesting when you want to read different slices of an opened file and do not want to open it again and again. For reference on how to use it just execute `pivotpy.export_potential??` in a notebook cell or ipython terminal to see how islice is used extensively.
        - dtype: float by default. Data type of output array, it is must have argument.
        - start,nlines: The indices of lines to start reading from and number of lines after start respectively. Only work if `path_or_islice` is a file path. both could be None or int, while start could be a list to read slices from file provided that nlines is int. The spacing between adjacent indices in start should be equal to or greater than nlines as pointer in file do not go back on its own.  These parameters are in output of `slice_data`
        > Note: `start` should count comments if `exclude` is None. You can use `slice_data` function to get a dictionary of `start,nlines, count, cols, new_shape` and unpack in argument instead of thinking too much.
        - count: `np.size(output_array) = nrows x ncols`, if it is known before execution, performance is increased. This parameter is in output of `slice_data`.
        - delimiter:  Default is `\s+`. Could be any kind of delimiter valid in numpy and in the file.
        - cols: List of indices of columns to pick. Useful when reading a file like PROCAR which e.g. has text and numbers inline. This parameter is in output of `slice_data`.
        - include: Default is None and includes everything. String of patterns separated by | to keep, could be a regular expression.
        - exclude: Default is '#' to remove comments. String of patterns separated by | to drop,could be a regular expression.
        - raw    : Default is False, if True, returns list of raw strings. Useful to select `cols`.
        - fix_format: Default is True, it sepearates numbers with poor formatting like 1.000-2.000 to 1.000 2.000 which is useful in PROCAR. Keep it False if want to read string literally.
        - new_shape : Tuple of shape Default is None. Will try to reshape in this shape, if fails fallbacks to 2D or 1D. This parameter is in output of `slice_data`.
    - **Examples**
        > `islice2array('path/to/PROCAR',start=3,include='k-point',cols=[3,4,5])[:2]`
        > array([[ 0.125,  0.125,  0.125],
        >        [ 0.375,  0.125,  0.125]])
        > `islice2array('path/to/EIGENVAL',start=7,exclude='E',cols=[1,2])[:2]`
        > array([[-11.476913,   1.      ],
        >        [  0.283532,   1.      ]])
    > Note: Slicing a dimension to 100% of its data is faster than let say 80% for inner dimensions, so if you have to slice more than 50% of an inner dimension, then just load full data and slice after it.
    """
    if nlines is None and isinstance(start,(list,np.ndarray)):
        print("`nlines = None` with `start = array/list` is useless combination.")
        return np.array([]) # return empty array.

    def _fixing(_islice,include=include, exclude=exclude,fix_format=fix_format,nlines=nlines,start=start):
        if include:
            _islice = (l for l in _islice if re.search(include,l))

        if exclude:
            _islice = (l for l in _islice if not re.search(exclude,l))

        # Make slices here after comment excluding.
        if isinstance(nlines,int) and isinstance(start,(list,np.ndarray)):
            #As islice moves the pointer as it reads, start[1:]-nlines-1
            # This confirms spacing between two indices in start >= nlines
            start = [start[0],*[s2-s1-nlines for s1,s2 in zip(start,start[1:])]]
            _islice = chain(*(islice(_islice,s,s+nlines) for s in start))
        elif isinstance(nlines,int) and isinstance(start,int):
            _islice = islice(_islice,start,start+nlines)
        elif nlines is None and isinstance(start,int):
            _islice = islice(_islice,start,None)

        # Negative connected digits to avoid, especially in PROCAR
        if fix_format:
            _islice = (re.sub(r"(\d)-(\d)",r"\1 -\2",l) for l in _islice)
        return _islice

    def _gen(_islice,cols=cols):
        for line in _islice:
            line = line.strip().replace(delimiter,'  ').split()
            if line and cols is not None: # if is must here.
                line = [line[i] for i in cols]
            for chars in line:
                yield dtype(chars)

    #Process Now
    if isinstance(path_or_islice,str) and os.path.isfile(path_or_islice):
        with open(path_or_islice,'r') as f:
            _islice = islice(f,0,None) # Read full, Will fix later.
            _islice = _fixing(_islice)
            if raw:
                return ''.join(_islice)
            # Must to consume islice when file is open
            data = np.fromiter(_gen(_islice),dtype=dtype,count=count)
    else:
        _islice = _fixing(path_or_islice)
        if raw:
            return ''.join(_islice)
        data = np.fromiter(_gen(_islice),dtype=dtype,count=count)

    if new_shape:
        try: data = data.reshape(new_shape)
        except: pass
    elif cols: #Otherwise single array.
        try: data = data.reshape((-1,len(cols)))
        except: pass
    return data

# Cell
def slice_data(dim_inds,old_shape):
    """
    - Returns a dictionary that can be unpacked in arguments of isclice2array function. This function works only for regular txt/csv/tsv data files which have rectangular data written.
    - **Parameters**
        - dim_inds : List of indices array or range to pick from each dimension. Inner dimensions are more towards right. Last itmes in dim_inds is considered to be columns. If you want to include all values in a dimension, you can put -1 in that dimension. Note that negative indexing does not work in file readig, -1 is s special case to fetch all items.
        - old_shape: Shape of data set including the columns length in right most place.
    - **Example**
        - You have data as 3D arry where third dimension is along column.
        > 0 0
        > 0 2
        > 1 0
        > 1 2
        - To pick [[0,2], [1,2]], you need to give
        > slice_data(dim_inds = [[0,1],[1],-1], old_shape=(2,2,2))
        > {'start': array([1, 3]), 'nlines': 1, 'count': 2}
        - Unpack above dictionary in `islice2array` and you will get output array.
    - Note that dimensions are packed from right to left, like 0,2 is repeating in 2nd column.
    """
    # Columns are treated diffiernetly.
    if dim_inds[-1] == -1:
        cols = None
    else:
        cols = list(dim_inds[-1])

    r_shape = old_shape[:-1]
    dim_inds = dim_inds[:-1]
    for i,ind in enumerate(dim_inds.copy()):
        if ind == -1:
            dim_inds[i] = range(r_shape[i])
    nlines = 1
    #start = [[NIONS*NBANDS*k + NIONS*b for b in _b_r] for k in range(skipk,NKPTS)] #kind of thing.
    _prod_ = product(*dim_inds)
    _mult_ = [np.product(r_shape[i+1:]) for i in range(len(r_shape))]
    _out_ = np.array([np.dot(p,_mult_) for p in _prod_]).astype(int)
    # check if innermost dimensions could be chunked.
    step = 1
    for i in range(-1,-len(dim_inds),-1):
        _inds = np.array(dim_inds[i]) #innermost
        if np.max(_inds[1:] - _inds[:-1]) == 1: # consecutive
            step = len(_inds)
            _out_ = _out_[::step] # Pick first indices
            nlines = step*nlines
            # Now check if all indices picked then make chunks in outer dimensions too.
            if step != r_shape[i]: # Can't make chunk of outer dimension if inner is not 100% picked.
                break # Stop more chunking
    new_shape = [len(inds) for inds in dim_inds] #dim_inds are only in rows.
    new_shape.append(old_shape[-1])
    return {'start':_out_,'nlines':nlines,'count': nlines*len(_out_),'cols':cols,'new_shape':tuple(new_shape)}

# Cell
def split_vasprun(path=None):
    """
    - Splits a given vasprun.xml file into a smaller _vasprun.xml file plus _set[1,2,3,4].txt files which contain projected data for each spin set.
    - **Parameters**
        - path: path/to/vasprun.xml file.
    - **Output**
        - _vasprun.xml file with projected data.
        - _set1.txt for projected data of colinear calculation.
        - _set1.txt for spin up data and _set2.txt for spin-polarized case.
        - _set[1,2,3,4].txt for each spin set of non-colinear calculations.
    """
    if not path:
        path = './vasprun.xml'
    if not os.path.isfile(path):
        raise FileNotFoundError("{!r} does not exist!".format(path))
    base_dir = os.path.split(os.path.abspath(path))[0]
    out_file = os.path.join(base_dir,'_vasprun.xml')
    out_sets = [os.path.join(base_dir,'_set{}.txt'.format(i)) for i in range(1,5)]
    # process
    with open(path,'r') as f:
        lines = islice(f,None)
        indices = [i for i,l in enumerate(lines) if re.search('projected|/eigenvalues',l)]
        f.seek(0)
        print("Writing {!r} ...".format(out_file),end=' ')
        with open(out_file,'w') as outf:
            outf.write(''.join(islice(f,0,indices[1])))
            f.seek(0)
            outf.write(''.join(islice(f,indices[-1]+1,None)))
            print('Done')

        f.seek(0)
        middle = islice(f,indices[-2]+1,indices[-1]) #projected words excluded
        spin_inds = [i for i,l in enumerate(middle) if re.search('spin',l)][1:] #first useless.
        if len(spin_inds)>1:
            set_length = spin_inds[1]-spin_inds[0] # Must define
        else:
            set_length = indices[-1]-indices[-2] #It is technically more than set length, but fine for 1 set
        f.seek(0) # Must be at zero
        N_sets = len(spin_inds)
        # Let's read shape from out_file as well.
        xml_data = read_asxml(out_file)
        _summary = get_summary(xml_data)
        NIONS  = _summary.NION
        NORBS  = len(_summary.fields)
        NBANDS = get_evals(xml_data).NBANDS
        NKPTS  = get_kpts(xml_data).NKPTS
        del xml_data # free meory now.
        for i in range(N_sets): #Reads every set
            print("Writing {!r} ...".format(out_sets[i]),end=' ')
            start = (indices[-2]+1+spin_inds[0] if i==0 else 0) # pointer is there next time.
            stop_ = start + set_length # Should move up to set length only.
            with open(out_sets[i],'w') as setf:
                setf.write("  # Set: {} Shape: (NKPTS[NBANDS[NIONS]],NORBS) = {},{},{},{}\n".format(i+1,NKPTS,NBANDS,NIONS,NORBS))
                middle = islice(f,start,stop_)
                setf.write(''.join(l.lstrip().replace('/','').replace('<r>','') for l in middle if '</r>' in l))
                print('Done')