Source code for elec

Collect codes of British `railway overhead electrification installations

import copy
import itertools
import os
import re
import socket
import urllib.error
import urllib.parse

import bs4
import pandas as pd
import requests
from pyhelpers.dir import cd, validate_input_data_dir
from pyhelpers.ops import confirmed, fake_requests_headers
from import load_pickle, save_pickle

from pyrcs.utils import cd_dat, get_catalogue, get_last_updated_date, homepage_url, \
    parse_tr, print_conn_err, is_internet_connected, print_connection_error

[docs]class Electrification: """ A class for collecting section codes for OLE installations. :param data_dir: name of data directory, defaults to ``None`` :type data_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``True`` :type verbose: bool or int **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> print(elec.Name) Electrification masts and related features >>> print(elec.SourceURL) """ def __init__(self, data_dir=None, update=False, verbose=True): """ Constructor method. """ if not is_internet_connected(): print_connection_error(verbose=verbose) self.Name = 'Electrification masts and related features' #: Name of data category self.Key = 'Electrification' self.HomeURL = homepage_url() #: URL to the homepage self.SourceURL = urllib.parse.urljoin( self.HomeURL, '/electrification/mast_prefix0.shtm') self.LUDKey = 'Last updated date' #: Key to last updated date self.Date = get_last_updated_date( url=self.SourceURL, parsed=True, as_date_type=False) self.Catalogue = get_catalogue( page_url=self.SourceURL, update=update, confirmation_required=False) if data_dir: self.DataDir = validate_input_data_dir(data_dir) else: self.DataDir = cd_dat("line-data", self.Key.lower()) self.CurrentDataDir = copy.copy(self.DataDir) self.NationalNetworkKey = 'National network' self.NationalNetworkPickle = self.NationalNetworkKey.lower().replace(" ", "-") self.IndependentLinesKey = 'Independent lines' self.IndependentLinesPickle = self.IndependentLinesKey.lower().replace(" ", "-") self.OhnsKey = 'National network neutral sections' self.OhnsPickle = self.OhnsKey.lower().replace(" ", "-") self.TariffZonesKey = 'National network energy tariff zones' self.TariffZonesPickle = self.TariffZonesKey.lower().replace(" ", "-") def _cdd_elec(self, *sub_dir, **kwargs): """ Change directory to package data directory and sub-directories (and/or a file). The directory for this module: ``"\\dat\\line-data\\electrification"``. :param sub_dir: sub-directory or sub-directories (and/or a file) :type sub_dir: str :param kwargs: optional parameters of `os.makedirs <>`_, e.g. ``mode=0o777`` :return: path to the backup data directory for ``Electrification`` :rtype: str :meta private: """ path = cd(self.DataDir, *sub_dir, mkdir=True, **kwargs) return path
[docs] def collect_national_network_codes(self, confirmation_required=True, verbose=False): """ Collect OLE section codes for `national network <>`_ from source web page. :param confirmation_required: whether to require users to confirm and proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for National network :rtype: dict or None **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> nn_dat = elec.collect_national_network_codes(confirmation_required=False) >>> type(nn_dat) <class 'dict'> >>> print(list(nn_dat.keys())) ['National network', 'Last updated date'] """ if confirmed("To collect section codes for OLE installations: {}?".format( self.NationalNetworkKey.lower()), confirmation_required=confirmation_required): national_network_ole = None if verbose == 2: print("Collecting the codes for {}".format( self.NationalNetworkKey.lower()), end=" ... ") try: source = requests.get(self.Catalogue[self.NationalNetworkKey], headers=fake_requests_headers()) except requests.exceptions.ConnectionError: print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: soup = bs4.BeautifulSoup(source.text, 'lxml') national_network_ole_, h3 = {}, soup.find('h3') while h3: header_tag = h3.find_next('table') if header_tag: header = [x.text for x in header_tag.find_all('th')] temp = parse_tr( header, header_tag.find_next('table').find_all('tr')) table = pd.DataFrame(temp, columns=header) table = table.applymap( lambda x: re.sub(r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace( '\\xa0', '')) else: table = pd.DataFrame((x.text for x in h3.find_all_next('li')), columns=['Unknown_codes']) # Notes notes = {'Notes': None} if h3.find_next_sibling().name == 'p': next_p = h3.find_next('p') if next_p.find_previous('h3') == h3: notes['Notes'] = next_p.text.replace('\xa0', '') note_tag = h3.find_next('h4') if note_tag and note_tag.text == 'Notes': notes_ = dict( (x.a.get('id').title(), x.get_text(strip=True).replace('\xa0', '')) for x in soup.find('ol') if x != '\n') if notes['Notes'] is None: notes['Notes'] = notes_ else: notes['Notes'] = [notes['Notes'], notes_] #'(\w ?)+(?=( \((\w ?)+\))?)', h3.text).group(0).strip() data_key = h3.text.strip() national_network_ole_.update({data_key: {'Codes': table, **notes}}) h3 = h3.find_next_sibling('h3') source.close() last_updated_date = \ get_last_updated_date(self.Catalogue[self.NationalNetworkKey]) national_network_ole = { self.NationalNetworkKey: national_network_ole_, self.LUDKey: last_updated_date} print("Done. ") if verbose == 2 else "" path_to_pickle = self._cdd_elec(self.NationalNetworkPickle + ".pickle") save_pickle(national_network_ole, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return national_network_ole
[docs] def fetch_national_network_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch OLE section codes for `national network <>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for National network :rtype: dict or None **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> nn_ole_dat = elec.fetch_national_network_codes() >>> type(nn_ole_dat) <class 'dict'> >>> print(list(nn_ole_dat.keys())) ['National network', 'Last updated date'] """ path_to_pickle = self._cdd_elec(self.NationalNetworkPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: national_network_ole = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) national_network_ole = self.collect_national_network_codes( confirmation_required=False, verbose=verbose_) if national_network_ole: # codes_for_ole is not None if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, self.NationalNetworkPickle + ".pickle") save_pickle(national_network_ole, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format( self.NationalNetworkKey.lower())) national_network_ole = load_pickle(path_to_pickle) return national_network_ole
[docs] def get_indep_line_names(self, verbose=False): """ Get names of `independent lines <>`_. :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: a list of independent line names :rtype: list **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> l_names = elec.get_indep_line_names() >>> print(l_names[:5]) ['Beamish Tramway', 'Birkenhead Tramway', 'Black Country Living Museum', 'Blackpool Tramway', 'Brighton and Rottingdean Seashore Electric Railway'] """ try: source = requests.get(self.Catalogue[self.IndependentLinesKey], headers=fake_requests_headers()) except requests.exceptions.ConnectionError: print_conn_err(verbose=verbose) else: soup = bs4.BeautifulSoup(source.text, 'lxml') for x in soup.find_all('p'): if re.match(r'^Jump to: ', x.text): line_names = x.text.replace('Jump to: ', '').split('\xa0| ') return line_names
[docs] def collect_indep_lines_codes(self, confirmation_required=True, verbose=False): """ Collect OLE section codes for `independent lines <>`_ from source web page. :param confirmation_required: whether to require users to confirm and proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for independent lines :rtype: dict or None **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> il_ole_dat = elec.collect_indep_lines_codes(confirmation_required=False) >>> type(il_ole_dat) <class 'dict'> >>> print(list(il_ole_dat.keys())) ['Independent lines', 'Last updated date'] """ if confirmed("To collect section codes for OLE installations: {}?".format( self.IndependentLinesKey.lower()), confirmation_required=confirmation_required): if verbose == 2: print("Collecting the codes for {}".format( self.IndependentLinesKey.lower()), end=" ... ") independent_lines_ole = None try: source = requests.get(self.Catalogue[self.IndependentLinesKey], headers=fake_requests_headers()) except requests.exceptions.ConnectionError: print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: soup = bs4.BeautifulSoup(source.text, 'lxml') independent_lines_ole_ = {} h3 = soup.find('h3') while h3: header_tag, table = h3.find_next('table'), None if header_tag: if header_tag.find_previous('h3') == h3: header = [x.text for x in header_tag.find_all('th')] temp = parse_tr( header, header_tag.find_next('table').find_all('tr')) table = pd.DataFrame(temp, columns=header) table = table.applymap( lambda x: re.sub( r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace( '\\xa0', '').strip()) notes = {'Notes': None} h4 = h3.find_next('h4') if h4: previous_h3 = h4.find_previous('h3') if previous_h3 == h3 and h4.text == 'Notes': notes_ = dict( (x.a.get('id').title(), x.get_text(strip=True).replace('\xa0', '')) for x in h4.find_next('ol') if x != '\n') if notes['Notes'] is None: notes['Notes'] = notes_ note_tag, note_txt = h3.find_next('p'), '' if note_tag: previous_h3 = note_tag.find_previous('h3') if previous_h3 == h3: note_txt = note_tag.text.replace('\xa0', '') if notes['Notes'] is None: notes['Notes'] = note_txt else: notes['Notes'] = [notes['Notes'], note_txt] ex_note_tag = note_tag.find_next('ol') if ex_note_tag: previous_h3 = ex_note_tag.find_previous('h3') if previous_h3 == h3: li = pd.DataFrame( list(re.sub(r'[()]', '', x.text).split(' ', 1) for x in ex_note_tag.find_all('li')), columns=['Initial', 'Code']) notes.update({'Section codes known at present': li}) independent_lines_ole_.update( {h3.text: {'Codes': table, **notes}}) h3 = h3.find_next_sibling('h3') source.close() last_updated_date = get_last_updated_date( self.Catalogue[self.IndependentLinesKey]) print("Done. ") if verbose == 2 else "" independent_lines_ole = { self.IndependentLinesKey: independent_lines_ole_, self.LUDKey: last_updated_date} pickle_filename_ = self.IndependentLinesKey.lower().replace(" ", "-") path_to_pickle = self._cdd_elec(pickle_filename_ + ".pickle") save_pickle(independent_lines_ole, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return independent_lines_ole
[docs] def fetch_indep_lines_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch OLE section codes for `independent lines <>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for independent lines :rtype: dict **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> il_ole_dat = elec.fetch_indep_lines_codes() >>> type(il_ole_dat) <class 'dict'> >>> print(list(il_ole_dat.keys())) ['Independent lines', 'Last updated date'] """ pickle_filename = self.IndependentLinesKey.lower().replace(" ", "-") + ".pickle" path_to_pickle = self._cdd_elec(pickle_filename) if os.path.isfile(path_to_pickle) and not update: independent_lines_ole = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) independent_lines_ole = self.collect_indep_lines_codes( confirmation_required=False, verbose=verbose_) if independent_lines_ole: # codes_for_independent_lines is not None if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, pickle_filename) save_pickle(independent_lines_ole, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format( self.IndependentLinesKey.lower())) independent_lines_ole = load_pickle(path_to_pickle) return independent_lines_ole
[docs] def collect_ohns_codes(self, confirmation_required=True, verbose=False): """ Collect codes for `overhead line electrification neutral sections <>`_ (OHNS) from source web page. :param confirmation_required: whether to require users to confirm and proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OHNS codes :rtype: dict or None **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> ohns_dat = elec.collect_ohns_codes(confirmation_required=False) >>> type(ohns_dat) <class 'dict'> >>> print(list(ohns_dat.keys())) ['National network neutral sections', 'Last updated date'] """ if confirmed("To collect section codes for OLE installations: {}?".format( self.OhnsKey.lower()), confirmation_required=confirmation_required): if verbose == 2: print("Collecting data of {}".format(self.OhnsKey.lower()), end=" ... ") ohns_codes = None try: header, neutral_sections_data = pd.read_html(self.Catalogue[self.OhnsKey]) except (urllib.error.URLError, socket.gaierror): print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: neutral_sections_data.columns = header.columns.to_list() neutral_sections_data.fillna('', inplace=True) last_up_date = get_last_updated_date(self.Catalogue[self.OhnsKey]) print("Done. ") if verbose == 2 else "" ohns_codes = {self.OhnsKey: neutral_sections_data, self.LUDKey: last_up_date} path_to_pickle = self._cdd_elec(self.OhnsPickle + ".pickle") save_pickle(ohns_codes, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return ohns_codes
[docs] def fetch_ohns_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch codes for `overhead line electrification neutral sections <>`_ (OHNS) from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OHNS codes :rtype: dict **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> ohns_dat = elec.fetch_ohns_codes() >>> type(ohns_dat) <class 'dict'> >>> print(list(ohns_dat.keys())) ['National network neutral sections', 'Last updated date'] """ path_to_pickle = self._cdd_elec(self.OhnsPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: ohns_codes = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) ohns_codes = self.collect_ohns_codes(confirmation_required=False, verbose=verbose_) if ohns_codes: # ohns is not None if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = \ os.path.join(self.CurrentDataDir, self.OhnsPickle + ".pickle") save_pickle(ohns_codes, path_to_pickle, verbose=verbose) else: print("No data of section codes for {} " "has been freshly collected.".format(self.OhnsKey.lower())) ohns_codes = load_pickle(path_to_pickle) return ohns_codes
[docs] def collect_etz_codes(self, confirmation_required=True, verbose=False): """ Collect OLE section codes for `national network energy tariff zones <>`_ from source web page. :param confirmation_required: whether to require users to confirm and proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for national network energy tariff zones :rtype: dict or None **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> etz_ole_dat = elec.collect_etz_codes(confirmation_required=False) >>> type(etz_ole_dat) <class 'dict'> >>> print(list(etz_ole_dat.keys())) ['National network energy tariff zones', 'Last updated date'] """ if confirmed("To collect section codes for OLE installations: {}?".format( self.TariffZonesKey.lower()), confirmation_required=confirmation_required): if verbose == 2: print("Collecting the codes for {}".format(self.TariffZonesKey.lower()), end=" ... ") etz_ole = None try: source = requests.get(self.Catalogue[self.TariffZonesKey], headers=fake_requests_headers()) except requests.exceptions.ConnectionError: print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: soup = bs4.BeautifulSoup(source.text, 'lxml') etz_ole_ = {} h3 = soup.find('h3') while h3: header_tag, table = h3.find_next('table'), None if header_tag: if header_tag.find_previous('h3') == h3: header = [x.text for x in header_tag.find_all('th')] temp = parse_tr( header, header_tag.find_next('table').find_all('tr')) table = pd.DataFrame(temp, columns=header) table = table.applymap( lambda x: re.sub( r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace( '\\xa0', '').strip()) notes, next_p = [], h3.find_next('p') previous_h3 = next_p.find_previous('h3') while previous_h3 == h3: notes.append(next_p.text.replace('\xa0', '')) next_p = next_p.find_next('p') try: previous_h3 = next_p.find_previous('h3') except AttributeError: break notes = ' '.join(notes).strip() etz_ole_.update({h3.text: table, 'Notes': notes}) h3 = h3.find_next_sibling('h3') source.close() last_upd = get_last_updated_date(self.Catalogue[self.TariffZonesKey]) print("Done. ") if verbose == 2 else "" etz_ole = {self.TariffZonesKey: etz_ole_, self.LUDKey: last_upd} path_to_pickle = self._cdd_elec(self.TariffZonesPickle + ".pickle") save_pickle(etz_ole, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return etz_ole
[docs] def fetch_etz_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch OLE section codes for `national network energy tariff zones <>`_ from source web page. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: OLE section codes for national network energy tariff zones :rtype: dict **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> etz_ole_dat = elec.fetch_etz_codes() >>> type(etz_ole_dat) <class 'dict'> >>> print(list(etz_ole_dat.keys())) ['National network energy tariff zones', 'Last updated date'] """ path_to_pickle = self._cdd_elec(self.TariffZonesPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: etz_ole = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) etz_ole = self.collect_etz_codes(confirmation_required=False, verbose=verbose_) if etz_ole: # codes_for_energy_tariff_zones is not None if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, self.TariffZonesPickle + ".pickle") save_pickle(etz_ole, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format( self.TariffZonesKey.lower())) etz_ole = load_pickle(path_to_pickle) return etz_ole
[docs] def fetch_elec_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch OLE section codes in `electrification <>`_ catalogue. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: section codes for overhead line electrification (OLE) installations :rtype: dict **Example**:: >>> from pyrcs.line_data import Electrification >>> elec = Electrification() >>> electrification_codes = elec.fetch_elec_codes() >>> type(electrification_codes) <class 'dict'> >>> print(list(electrification_codes.keys())) ['Electrification', 'Last updated date'] """ verbose_ = False if (data_dir or not verbose) else (2 if verbose == 2 else True) codes = [] for func in dir(self): if func.startswith('fetch_') and func != 'fetch_elec_codes': codes.append(getattr(self, func)( update=update, verbose=verbose_ if is_internet_connected() else False)) ole_section_codes = { self.Key: {next(iter(x)): next(iter(x.values())) for x in codes}, self.LUDKey: max(next(itertools.islice(iter(x.values()), 1, 2)) for x in codes)} if pickle_it and data_dir: pickle_filename = self.Name.lower().replace(" ", "-") + ".pickle" self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, pickle_filename) save_pickle(ole_section_codes, path_to_pickle, verbose=verbose) return ole_section_codes