""" Collecting section codes for OLE installations.
Data source: http://www.railwaycodes.org.uk/electrification/mast_prefix0.shtm
"""
import copy
import itertools
import os
import re
import urllib.parse
import bs4
import pandas as pd
import requests
from pyhelpers.dir import cd, validate_input_data_dir
from pyhelpers.ops import confirmed, fake_requests_headers
from pyhelpers.store import load_pickle, save_pickle
from pyrcs.utils import cd_dat, get_catalogue, get_last_updated_date, homepage_url, parse_tr
[docs]class Electrification:
"""
A class for collecting codes associated with British railway overhead electrification installations.
:param data_dir: name of data directory, defaults to ``None``
:type data_dir: str, None
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
print(elec.Name)
# Electrification masts and related features
print(elec.SourceURL)
# http://www.railwaycodes.org.uk/electrification/mast_prefix0.shtm
"""
def __init__(self, data_dir=None, update=False):
"""
Constructor method.
"""
self.Name = 'Electrification masts and related features'
self.HomeURL = homepage_url()
self.SourceURL = urllib.parse.urljoin(self.HomeURL, '/electrification/mast_prefix0.shtm')
self.Catalogue = get_catalogue(self.SourceURL, update=update, confirmation_required=False)
self.Date = get_last_updated_date(self.SourceURL, parsed=True, as_date_type=False)
self.Key = 'Electrification'
self.LUDKey = 'Last updated date' # key to last updated date
self.DataDir = validate_input_data_dir(data_dir) if data_dir else cd_dat("line-data", self.Key.lower())
self.CurrentDataDir = copy.copy(self.DataDir)
self.NationalNetworkKey = 'National network'
self.NationalNetworkPickle = self.NationalNetworkKey.lower().replace(" ", "-")
self.IndependentLinesKey = 'Independent lines'
self.IndependentLinesPickle = self.IndependentLinesKey.lower().replace(" ", "-")
self.OhnsKey = 'National network neutral sections'
self.OhnsPickle = self.OhnsKey.lower().replace(" ", "-")
self.TariffZonesKey = 'National network energy tariff zones'
self.TariffZonesPickle = self.TariffZonesKey.lower().replace(" ", "-")
def cdd_elec(self, *sub_dir, **kwargs):
"""
Change directory to "dat\\line-data\\electrification\\" and sub-directories (and/or a file)
:param sub_dir: sub-directory or sub-directories (and/or a file)
:type sub_dir: str
:param kwargs: optional parameters of `os.makedirs <https://docs.python.org/3/library/os.html#os.makedirs>`_,
e.g. ``mode=0o777``
:return: path to the backup data directory for ``Electrification``
:rtype: str
:meta private:
"""
path = cd(self.DataDir, *sub_dir, mkdir=True, **kwargs)
return path
[docs] def collect_codes_for_national_network(self, confirmation_required=True, verbose=False):
"""
Collect OLE section codes for National network from source web page.
:param confirmation_required: whether to require users to confirm and proceed, defaults to ``True``
:type confirmation_required: bool
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for National network
:rtype: dict, None
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
confirmation_required = True
national_network_ole = elec.collect_codes_for_national_network(confirmation_required)
# To collect section codes for OLE installations: national network? [No]|Yes:
# >? yes
print(national_network_ole)
# {'National network': <code>,
# 'Last updated date': <date>}
"""
if confirmed("To collect section codes for OLE installations: {}?".format(self.NationalNetworkKey.lower()),
confirmation_required=confirmation_required):
if verbose == 2:
print("Collecting the codes for {}".format(self.NationalNetworkKey.lower()), end=" ... ")
try:
source = requests.get(self.Catalogue[self.NationalNetworkKey], headers=fake_requests_headers())
soup = bs4.BeautifulSoup(source.text, 'lxml')
national_network_ole_, h3 = {}, soup.find('h3')
while h3:
header_tag = h3.find_next('table')
if header_tag:
header = [x.text for x in header_tag.find_all('th')]
table = pd.DataFrame(parse_tr(header, header_tag.find_next('table').find_all('tr')),
columns=header)
table = table.applymap(
lambda x: re.sub(r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace('\\xa0', ''))
else:
table = pd.DataFrame((x.text for x in h3.find_all_next('li')), columns=['Unknown_codes'])
# Notes
notes = {'Notes': None}
if h3.find_next_sibling().name == 'p':
next_p = h3.find_next('p')
if next_p.find_previous('h3') == h3:
notes['Notes'] = next_p.text.replace('\xa0', '')
note_tag = h3.find_next('h4')
if note_tag and note_tag.text == 'Notes':
notes_ = dict((x.a.get('id').title(), x.get_text(strip=True).replace('\xa0', ''))
for x in soup.find('ol') if x != '\n')
if notes['Notes'] is None:
notes['Notes'] = notes_
else:
notes['Notes'] = [notes['Notes'], notes_]
data_key = h3.text.strip() # re.search(r'(\w ?)+(?=( \((\w ?)+\))?)', h3.text).group(0).strip()
national_network_ole_.update({data_key: {'Codes': table, **notes}})
h3 = h3.find_next_sibling('h3')
source.close()
last_updated_date = get_last_updated_date(self.Catalogue[self.NationalNetworkKey])
national_network_ole = {self.NationalNetworkKey: national_network_ole_, self.LUDKey: last_updated_date}
print("Done. ") if verbose == 2 else ""
path_to_pickle = self.cdd_elec(self.NationalNetworkPickle + ".pickle")
save_pickle(national_network_ole, path_to_pickle, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
national_network_ole = None
return national_network_ole
[docs] def fetch_codes_for_national_network(self, update=False, pickle_it=False, data_dir=None, verbose=False):
"""
Fetch OLE section codes for National network from local backup.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False``
:type pickle_it: bool
:param data_dir: name of package data folder, defaults to ``None``
:type data_dir: str, None
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for National network
:rtype: dict, None
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
update = False
pickle_it = False
data_dir = None
national_network_ole = elec.fetch_codes_for_national_network(update, pickle_it, data_dir)
print(national_network_ole)
# {'National network': <code>,
# 'Last updated date': <date>}
"""
path_to_pickle = self.cdd_elec(self.NationalNetworkPickle + ".pickle")
if os.path.isfile(path_to_pickle) and not update:
national_network_ole = load_pickle(path_to_pickle)
else:
national_network_ole = self.collect_codes_for_national_network(
confirmation_required=False, verbose=False if data_dir or not verbose else True)
if national_network_ole: # codes_for_ole is not None
if pickle_it and data_dir:
self.CurrentDataDir = validate_input_data_dir(data_dir)
path_to_pickle = os.path.join(self.CurrentDataDir, self.NationalNetworkPickle + ".pickle")
save_pickle(national_network_ole, path_to_pickle, verbose=verbose)
else:
print("No data of {} has been collected.".format(self.NationalNetworkKey.lower()))
return national_network_ole
[docs] def get_names_of_independent_lines(self):
"""
Get names of independent lines.
:return: a list of independent line names
:rtype: list
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
line_names = elec.get_names_of_independent_lines()
print(line_names)
# a list of independent line names
"""
source = requests.get(self.Catalogue[self.IndependentLinesKey], headers=fake_requests_headers())
soup = bs4.BeautifulSoup(source.text, 'lxml')
for x in soup.find_all('p'):
if re.match(r'^Jump to: ', x.text):
line_names = x.text.replace('Jump to: ', '').split('\xa0| ')
return line_names
[docs] def collect_codes_for_independent_lines(self, confirmation_required=True, verbose=False):
"""
Collect OLE section codes for independent lines from source web page.
:param confirmation_required: whether to require users to confirm and proceed, defaults to ``True``
:type confirmation_required: bool
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for independent lines
:rtype: dict, None
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
confirmation_required = True
independent_lines_ole = elec.collect_codes_for_independent_lines(confirmation_required)
# To collect section codes for OLE installations: independent lines? [No]|Yes:
# >? yes
print(independent_lines_ole)
# {'Independent lines': <codes>,
# 'Last updated date': <date>}
"""
if confirmed("To collect section codes for OLE installations: {}?".format(self.IndependentLinesKey.lower()),
confirmation_required=confirmation_required):
if verbose == 2:
print("Collecting the codes for {}".format(self.IndependentLinesKey.lower()), end=" ... ")
try:
source = requests.get(self.Catalogue[self.IndependentLinesKey], headers=fake_requests_headers())
soup = bs4.BeautifulSoup(source.text, 'lxml')
independent_lines_ole_ = {}
h3 = soup.find('h3')
while h3:
header_tag, table = h3.find_next('table'), None
if header_tag:
if header_tag.find_previous('h3') == h3:
header = [x.text for x in header_tag.find_all('th')]
table = pd.DataFrame(parse_tr(header, header_tag.find_next('table').find_all('tr')),
columns=header)
table = table.applymap(
lambda x: re.sub(
r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace('\\xa0', '').strip())
notes = {'Notes': None}
h4 = h3.find_next('h4')
if h4:
previous_h3 = h4.find_previous('h3')
if previous_h3 == h3 and h4.text == 'Notes':
notes_ = dict((x.a.get('id').title(), x.get_text(strip=True).replace('\xa0', ''))
for x in h4.find_next('ol') if x != '\n')
if notes['Notes'] is None:
notes['Notes'] = notes_
note_tag, note_txt = h3.find_next('p'), ''
if note_tag:
previous_h3 = note_tag.find_previous('h3')
if previous_h3 == h3:
note_txt = note_tag.text.replace('\xa0', '')
if notes['Notes'] is None:
notes['Notes'] = note_txt
else:
notes['Notes'] = [notes['Notes'], note_txt]
ex_note_tag = note_tag.find_next('ol')
if ex_note_tag:
previous_h3 = ex_note_tag.find_previous('h3')
if previous_h3 == h3:
li = pd.DataFrame(list(re.sub(r'[()]', '', x.text).split(' ', 1)
for x in ex_note_tag.find_all('li')), columns=['Initial', 'Code'])
notes.update({'Section codes known at present': li})
independent_lines_ole_.update({h3.text: {'Codes': table, **notes}})
h3 = h3.find_next_sibling('h3')
source.close()
last_updated_date = get_last_updated_date(self.Catalogue[self.IndependentLinesKey])
independent_lines_ole = {self.IndependentLinesKey: independent_lines_ole_,
self.LUDKey: last_updated_date}
print("Done. ") if verbose == 2 else ""
pickle_filename = self.IndependentLinesKey.lower().replace(" ", "-") + ".pickle"
path_to_pickle = self.cdd_elec(pickle_filename)
save_pickle(independent_lines_ole, path_to_pickle, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
independent_lines_ole = None
return independent_lines_ole
[docs] def fetch_codes_for_independent_lines(self, update=False, pickle_it=False, data_dir=None, verbose=False):
"""
Fetch OLE section codes for independent lines from local backup.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False``
:type pickle_it: bool
:param data_dir: name of package data folder, defaults to ``None``
:type data_dir: str, None
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for independent lines
:rtype: dict
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
update = False
pickle_it = False
data_dir = None
independent_lines_ole = elec.fetch_codes_for_independent_lines(update, pickle_it, data_dir)
print(independent_lines_ole)
# {'Independent lines': <codes>,
# 'Last updated date': <date>}
"""
pickle_filename = self.IndependentLinesKey.lower().replace(" ", "-") + ".pickle"
path_to_pickle = self.cdd_elec(pickle_filename)
if os.path.isfile(path_to_pickle) and not update:
independent_lines_ole = load_pickle(path_to_pickle)
else:
independent_lines_ole = self.collect_codes_for_independent_lines(
confirmation_required=False, verbose=False if data_dir or not verbose else True)
if independent_lines_ole: # codes_for_independent_lines is not None
if pickle_it and data_dir:
self.CurrentDataDir = validate_input_data_dir(data_dir)
path_to_pickle = os.path.join(self.CurrentDataDir, pickle_filename)
save_pickle(independent_lines_ole, path_to_pickle, verbose=verbose)
else:
print("No data of {} has been collected.".format(self.IndependentLinesKey.lower()))
return independent_lines_ole
[docs] def collect_codes_for_ohns(self, confirmation_required=True, verbose=False):
"""
Collect codes for overhead line electrification neutral sections (OHNS) from source web page.
:param confirmation_required: whether to require users to confirm and proceed, defaults to ``True``
:type confirmation_required: bool
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OHNS codes
:rtype: dict, None
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
confirmation_required = True
ohns_codes = elec.collect_codes_for_ohns(confirmation_required)
# To collect section codes for OLE installations: national network neutral sections? [No]|Yes:
# >? yes
print(ohns_codes)
# {'National network neutral sections': <codes>,
# 'Last updated date': <date>}
"""
if confirmed("To collect section codes for OLE installations: {}?".format(self.OhnsKey.lower()),
confirmation_required=confirmation_required):
if verbose == 2:
print("Collecting data of {}".format(self.OhnsKey.lower()), end=" ... ")
try:
header, neutral_sections_data = pd.read_html(self.Catalogue[self.OhnsKey])
neutral_sections_data.columns = header.columns.to_list()
neutral_sections_data.fillna('', inplace=True)
ohns_codes = {self.OhnsKey: neutral_sections_data,
self.LUDKey: get_last_updated_date(self.Catalogue[self.OhnsKey])}
print("Done. ") if verbose == 2 else ""
path_to_pickle = self.cdd_elec(self.OhnsPickle + ".pickle")
save_pickle(ohns_codes, path_to_pickle, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
ohns_codes = None
return ohns_codes
[docs] def fetch_codes_for_ohns(self, update=False, pickle_it=False, data_dir=None, verbose=False):
"""
Fetch codes for overhead line electrification neutral sections (OHNS) from local backup.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False``
:type pickle_it: bool
:param data_dir: name of package data folder, defaults to ``None``
:type data_dir: str, None
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OHNS codes
:rtype: dict
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
update = False
pickle_it = False
data_dir = None
ohns_codes = elec.fetch_codes_for_ohns(update, pickle_it, data_dir)
print(ohns_codes)
# {'National network neutral sections': <codes>,
# 'Last updated date': <date>}
"""
path_to_pickle = self.cdd_elec(self.OhnsPickle + ".pickle")
if os.path.isfile(path_to_pickle) and not update:
ohns_codes = load_pickle(path_to_pickle)
else:
ohns_codes = self.collect_codes_for_ohns(confirmation_required=False,
verbose=False if data_dir or not verbose else True)
if ohns_codes: # ohns is not None
if pickle_it and data_dir:
self.CurrentDataDir = validate_input_data_dir(data_dir)
path_to_pickle = os.path.join(self.CurrentDataDir, self.OhnsPickle + ".pickle")
save_pickle(ohns_codes, path_to_pickle, verbose=verbose)
else:
print("No data of section codes for {} has been collected.".format(self.OhnsKey.lower()))
return ohns_codes
[docs] def collect_codes_for_energy_tariff_zones(self, confirmation_required=True, verbose=False):
"""
Collect OLE section codes for national network energy tariff zones from source web page.
:param confirmation_required: whether to require users to confirm and proceed, defaults to ``True``
:type confirmation_required: bool
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for national network energy tariff zones
:rtype: dict, None
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
confirmation_required = True
etz_ole = elec.collect_codes_for_energy_tariff_zones(confirmation_required)
# To collect section codes for OLE installations: national network energy tariff zones? [No]|Yes:
# >? yes
print(etz_ole)
# {'National network energy tariff zones': <codes>,
# 'Last updated date': <date>}
"""
if confirmed("To collect section codes for OLE installations: {}?".format(self.TariffZonesKey.lower()),
confirmation_required=confirmation_required):
if verbose == 2:
print("Collecting the codes for {}".format(self.TariffZonesKey.lower()), end=" ... ")
try:
source = requests.get(self.Catalogue[self.TariffZonesKey], headers=fake_requests_headers())
soup = bs4.BeautifulSoup(source.text, 'lxml')
etz_ole_ = {}
h3 = soup.find('h3')
while h3:
header_tag, table = h3.find_next('table'), None
if header_tag:
if header_tag.find_previous('h3') == h3:
header = [x.text for x in header_tag.find_all('th')]
table = pd.DataFrame(parse_tr(header, header_tag.find_next('table').find_all('tr')),
columns=header)
table = table.applymap(
lambda x: re.sub(r'\']\)?', ']', re.sub(r'\(?\[\'', '[', x)).replace(
'\\xa0', '').strip())
notes, next_p = [], h3.find_next('p')
previous_h3 = next_p.find_previous('h3')
while previous_h3 == h3:
notes.append(next_p.text.replace('\xa0', ''))
next_p = next_p.find_next('p')
try:
previous_h3 = next_p.find_previous('h3')
except AttributeError:
break
notes = ' '.join(notes).strip()
etz_ole_.update({h3.text: table, 'Notes': notes})
h3 = h3.find_next_sibling('h3')
source.close()
etz_ole = {self.TariffZonesKey: etz_ole_,
self.LUDKey: get_last_updated_date(self.Catalogue[self.TariffZonesKey])}
print("Done. ") if verbose == 2 else ""
path_to_pickle = self.cdd_elec(self.TariffZonesPickle + ".pickle")
save_pickle(etz_ole, path_to_pickle, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
etz_ole = None
return etz_ole
[docs] def fetch_codes_for_energy_tariff_zones(self, update=False, pickle_it=False, data_dir=None, verbose=False):
"""
Fetch OLE section codes for national network energy tariff zones from source web page.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False``
:type pickle_it: bool
:param data_dir: name of package data folder, defaults to ``None``
:type data_dir: str, None
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: OLE section codes for national network energy tariff zones
:rtype: dict
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
update = False
pickle_it = False
data_dir = None
etz_ole = elec.fetch_codes_for_energy_tariff_zones(update, pickle_it, data_dir)
print(etz_ole)
# {'National network energy tariff zones': <codes>,
# 'Last updated date': <date>}
"""
path_to_pickle = self.cdd_elec(self.TariffZonesPickle + ".pickle")
if os.path.isfile(path_to_pickle) and not update:
etz_ole = load_pickle(path_to_pickle)
else:
etz_ole = self.collect_codes_for_energy_tariff_zones(
confirmation_required=False, verbose=False if data_dir or not verbose else True)
if etz_ole: # codes_for_energy_tariff_zones is not None
if pickle_it and data_dir:
self.CurrentDataDir = validate_input_data_dir(data_dir)
path_to_pickle = os.path.join(self.CurrentDataDir, self.TariffZonesPickle + ".pickle")
save_pickle(etz_ole, path_to_pickle, verbose=verbose)
else:
print("No data of {} has been collected.".format(self.TariffZonesKey.lower()))
return etz_ole
[docs] def fetch_electrification_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False):
"""
Fetch OLE section codes in the electrification catalogue.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False``
:type pickle_it: bool
:param data_dir: name of package data folder, defaults to ``None``
:type data_dir: str, None
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: section codes for overhead line electrification (OLE) installations
:rtype: dict
**Example**::
from pyrcs.line_data import Electrification
elec = Electrification()
update = False
pickle_it = False
data_dir = None
ole_section_codes = elec.fetch_electrification_codes(update, pickle_it, data_dir)
print(ole_section_codes)
# {'Electrification': <codes>,
# 'Latest update date': <date>}
"""
codes = []
for func in dir(self):
if func.startswith('fetch_codes_for_'):
codes.append(getattr(self, func)(update=update, verbose=verbose))
ole_section_codes = {self.Key: {next(iter(x)): next(iter(x.values())) for x in codes},
self.LUDKey: max(next(itertools.islice(iter(x.values()), 1, 2)) for x in codes)}
if pickle_it and data_dir:
pickle_filename = self.Name.lower().replace(" ", "-") + ".pickle"
self.CurrentDataDir = validate_input_data_dir(data_dir)
path_to_pickle = os.path.join(self.CurrentDataDir, pickle_filename)
save_pickle(ole_section_codes, path_to_pickle, verbose=verbose)
return ole_section_codes