Source code for depot

"""
Collect `depots codes <http://www.railwaycodes.org.uk/depots/depots0.shtm>`_.
"""

import copy
import os
import re
import socket
import urllib.error
import urllib.parse

import bs4
import pandas as pd
import requests
from pyhelpers.dir import cd, validate_input_data_dir
from pyhelpers.ops import confirmed, fake_requests_headers
from pyhelpers.store import load_pickle, save_pickle

from pyrcs.utils import cd_dat, get_catalogue, get_last_updated_date, homepage_url, \
    print_conn_err, is_internet_connected, print_connection_error


[docs]class Depots: """ A class for collecting depot codes. :param data_dir: name of data directory, defaults to ``None`` :type data_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> print(depots.Name) Depot codes >>> print(depots.SourceURL) http://www.railwaycodes.org.uk/depots/depots0.shtm """ def __init__(self, data_dir=None, update=False, verbose=True): """ Constructor method. """ if not is_internet_connected(): print_connection_error(verbose=verbose) self.Name = 'Depot codes' self.Key = 'Depots' self.HomeURL = homepage_url() self.SourceURL = urllib.parse.urljoin(self.HomeURL, '/depots/depots0.shtm') self.LUDKey = 'Last updated date' # key to last updated date self.Date = get_last_updated_date(url=self.SourceURL, parsed=True, as_date_type=False) self.Catalogue = get_catalogue(page_url=self.SourceURL, update=update, confirmation_required=False) if data_dir: self.DataDir = validate_input_data_dir(data_dir) else: self.DataDir = cd_dat("other-assets", self.Key.lower()) self.CurrentDataDir = copy.copy(self.DataDir) self.TCTKey, self.FDPTKey, self.S1950Key, self.GWRKey = \ list(self.Catalogue.keys())[1:] self.TCTPickle = self.TCTKey.replace(" ", "-").lower() self.FDPTPickle = re.sub(r'[ -]', '-', self.FDPTKey).lower() self.S1950Pickle = re.sub(r' \(|\) | ', '-', self.S1950Key).lower() self.GWRPickle = self.GWRKey.replace(" ", "-").lower() def _cdd_depots(self, *sub_dir, **kwargs): """ Change directory to package data directory and sub-directories (and/or a file). The directory for this module: ``"\\dat\\other-assets\\depots"``. :param sub_dir: sub-directory or sub-directories (and/or a file) :type sub_dir: str :param kwargs: optional parameters of `os.makedirs <https://docs.python.org/3/library/os.html#os.makedirs>`_, e.g. ``mode=0o777`` :return: path to the backup data directory for ``Depots`` :rtype: str :meta private: """ path = cd(self.DataDir, *sub_dir, mkdir=True, **kwargs) return path
[docs] def collect_two_char_tops_codes(self, confirmation_required=True, verbose=False): """ Collect `two-character TOPS codes <http://www.railwaycodes.org.uk/depots/depots1.shtm>`_ from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict or None **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> two_char_tops_codes_dat = depots.collect_two_char_tops_codes() To collect data of two character TOPS codes? [No]|Yes: yes >>> type(two_char_tops_codes_dat) <class 'dict'> >>> print(list(two_char_tops_codes_dat.keys())) ['Two character TOPS codes', 'Last updated date'] """ if confirmed("To collect data of {}?".format( self.TCTKey[:1].lower() + self.TCTKey[1:]), confirmation_required=confirmation_required): url = self.Catalogue[self.TCTKey] if verbose == 2: print("Collecting data of {}".format( self.TCTKey[:1].lower() + self.TCTKey[1:]), end=" ... ") two_char_tops_codes_data = None try: header, two_char_tops_codes = pd.read_html(url, na_values=[''], keep_default_na=False) except (urllib.error.URLError, socket.gaierror): print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: two_char_tops_codes.columns = header.columns.to_list() two_char_tops_codes.fillna('', inplace=True) last_updated_date = get_last_updated_date(url) print("Done. ") if verbose == 2 else "" two_char_tops_codes_data = {self.TCTKey: two_char_tops_codes, self.LUDKey: last_updated_date} path_to_pickle = self._cdd_depots(self.TCTPickle + ".pickle") save_pickle(two_char_tops_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return two_char_tops_codes_data
[docs] def fetch_two_char_tops_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch `two-character TOPS codes <http://www.railwaycodes.org.uk/depots/depots1.shtm>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> two_char_tops_codes_dat = depots.fetch_two_char_tops_codes() >>> type(two_char_tops_codes_dat) <class 'dict'> >>> print(list(two_char_tops_codes_dat.keys())) ['Two character TOPS codes', 'Last updated date'] """ path_to_pickle = self._cdd_depots(self.TCTPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: two_char_tops_codes_data = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) two_char_tops_codes_data = self.collect_two_char_tops_codes( confirmation_required=False, verbose=verbose_) if two_char_tops_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, self.TCTPickle + ".pickle") save_pickle(two_char_tops_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format( self.TCTKey[:1].lower() + self.TCTKey[1:])) two_char_tops_codes_data = load_pickle(path_to_pickle) return two_char_tops_codes_data
[docs] def collect_four_digit_pre_tops_codes(self, confirmation_required=True, verbose=False): """ Collect `four-digit pre-TOPS codes <http://www.railwaycodes.org.uk/depots/depots2.shtm>`_ from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict or None **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> four_digit_pre_tops_codes_dat = depots.collect_four_digit_pre_tops_codes() To collect data of four digit pre-TOPS codes? [No]|Yes: yes >>> type(four_digit_pre_tops_codes_dat) <class 'dict'> >>> print(list(four_digit_pre_tops_codes_dat.keys())) ['Four digit pre-TOPS codes', 'Last updated date'] >>> type(four_digit_pre_tops_codes_dat['Four digit pre-TOPS codes']) <class 'dict'> """ if confirmed("To collect data of {}?".format( self.FDPTKey[:1].lower() + self.FDPTKey[1:]), confirmation_required=confirmation_required): path_to_pickle = self._cdd_depots(self.FDPTPickle + ".pickle") url = self.Catalogue[self.FDPTKey] if verbose == 2: print("Collecting data of {}".format( self.FDPTKey[:1].lower() + self.FDPTKey[1:]), end=" ... ") four_digit_pre_tops_codes_data = None try: source = requests.get(url, headers=fake_requests_headers()) except requests.ConnectionError: print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: p_tags = bs4.BeautifulSoup(source.text, 'lxml').find_all('p') region_names = [x.text.replace('Jump to: ', '').strip().split(' | ') for x in p_tags if x.text.startswith('Jump to: ')][0] data_sets = iter( pd.read_html(source.text, na_values=[''], keep_default_na=False)) four_digit_pre_tops_codes_list = [] for x in data_sets: header, four_digit_pre_tops_codes_data = x, next(data_sets) four_digit_pre_tops_codes_data.columns = header.columns.to_list() four_digit_pre_tops_codes_list.append( four_digit_pre_tops_codes_data) last_updated_date = get_last_updated_date(url) print("Done. ") if verbose == 2 else "" four_digit_pre_tops_codes_data = { self.FDPTKey: dict(zip(region_names, four_digit_pre_tops_codes_list)), self.LUDKey: last_updated_date} save_pickle(four_digit_pre_tops_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return four_digit_pre_tops_codes_data
[docs] def fetch_four_digit_pre_tops_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch `four-digit pre-TOPS codes <http://www.railwaycodes.org.uk/depots/depots2.shtm>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> four_digit_pre_tops_codes_dat = depots.fetch_four_digit_pre_tops_codes() >>> type(four_digit_pre_tops_codes_dat) <class 'dict'> >>> print(list(four_digit_pre_tops_codes_dat.keys())) ['Four digit pre-TOPS codes', 'Last updated date'] >>> four_digit_pre_tops_codes = \ ... four_digit_pre_tops_codes_dat['Four digit pre-TOPS codes'] >>> print(list(four_digit_pre_tops_codes.keys())) ['Main Works', 'London Midland Region', 'Western Region', 'Southern Region', 'Eastern Region', 'Scottish Region'] """ path_to_pickle = self._cdd_depots(self.FDPTPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: four_digit_pre_tops_codes_data = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) four_digit_pre_tops_codes_data = self.collect_four_digit_pre_tops_codes( confirmation_required=False, verbose=verbose_) if four_digit_pre_tops_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(four_digit_pre_tops_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format( self.FDPTKey[:1].lower() + self.FDPTKey[1:])) four_digit_pre_tops_codes_data = load_pickle(path_to_pickle) return four_digit_pre_tops_codes_data
[docs] def collect_1950_system_codes(self, confirmation_required=True, verbose=False): """ Collect `1950 system (pre-TOPS) codes <http://www.railwaycodes.org.uk/depots/depots3.shtm>`_ from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of 1950 system (pre-TOPS) codes and date of when the data was last updated :rtype: dict or None **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> system_1950_codes_dat = depots.collect_1950_system_codes() To collect data of 1950 system (pre-TOPS) codes? [No]|Yes: yes >>> type(system_1950_codes_dat) <class 'dict'> >>> print(list(system_1950_codes_dat.keys())) ['1950 system (pre-TOPS) codes', 'Last updated date'] """ if confirmed("To collect data of {}?".format(self.S1950Key), confirmation_required=confirmation_required): url = self.Catalogue[self.S1950Key] if verbose == 2: print("Collecting data of {}".format(self.S1950Key), end=" ... ") system_1950_codes_data = None try: header, system_1950_codes = pd.read_html(url, na_values=[''], keep_default_na=False) except (urllib.error.URLError, socket.gaierror): print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: system_1950_codes.columns = header.columns.to_list() last_updated_date = get_last_updated_date(url) print("Done. ") if verbose == 2 else "" system_1950_codes_data = {self.S1950Key: system_1950_codes, self.LUDKey: last_updated_date} path_to_pickle = self._cdd_depots(self.S1950Pickle + ".pickle") save_pickle(system_1950_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return system_1950_codes_data
[docs] def fetch_1950_system_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch `1950 system (pre-TOPS) codes <http://www.railwaycodes.org.uk/depots/depots3.shtm>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of 1950 system (pre-TOPS) codes and date of when the data was last updated :rtype: dict **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> system_1950_codes_dat = depots.fetch_1950_system_codes() >>> system_1950_codes = system_1950_codes_dat['1950 system (pre-TOPS) codes'] >>> type(system_1950_codes) <class 'pandas.core.frame.DataFrame'> >>> print(system_1950_codes.head()) Code Depot Notes 0 1A Willesden From 1950. Became WN from 6 May 1973 1 1B Camden From 1950. To 3 January 1966 2 1C Watford From 1950. Became WJ from 6 May 1973 3 1D Devons Road, Bow Previously 13B to 9 June 1950. Became 1J from ... 4 1D Marylebone Previously 14F to 31 August 1963. Became ME fr... """ path_to_pickle = self._cdd_depots(self.S1950Pickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: system_1950_codes_data = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) system_1950_codes_data = self.collect_1950_system_codes( confirmation_required=False, verbose=verbose_) if system_1950_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(system_1950_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been freshly collected.".format(self.S1950Key)) system_1950_codes_data = load_pickle(path_to_pickle) return system_1950_codes_data
[docs] def collect_gwr_codes(self, confirmation_required=True, verbose=False): """ Collect `Great Western Railway (GWR) depot codes <http://www.railwaycodes.org.uk/depots/depots4.shtm>`_ from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of GWR depot codes and date of when the data was last updated :rtype: dict or None **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> gwr_codes_dat = depots.collect_gwr_codes() To collect data of GWR codes? [No]|Yes: yes >>> type(gwr_codes_dat) <class 'dict'> >>> print(list(gwr_codes_dat.keys())) ['GWR codes', 'Last updated date'] """ if confirmed("To collect data of {}?".format(self.GWRKey), confirmation_required=confirmation_required): url = self.Catalogue[self.GWRKey] if verbose == 2: print("Collecting data of {}".format(self.GWRKey), end=" ... ") gwr_codes_data = None try: header, alphabetical_codes, numerical_codes_1, _, numerical_codes_2 = \ pd.read_html(url) except (urllib.error.URLError, socket.gaierror): print("Failed. ") if verbose == 2 else "" print_conn_err(verbose=verbose) else: try: # Alphabetical codes alphabetical_codes.columns = header.columns.to_list() # Numerical codes numerical_codes_1.drop(1, axis=1, inplace=True) numerical_codes_1.columns = header.columns.to_list() numerical_codes_2.columns = header.columns.to_list() numerical_codes = pd.concat([numerical_codes_1, numerical_codes_2]) source = requests.get(url) soup = bs4.BeautifulSoup(source.text, 'lxml') gwr_codes = dict(zip([x.text for x in soup.find_all('h3')], [alphabetical_codes, numerical_codes])) last_updated_date = get_last_updated_date(url) print("Done. ") if verbose == 2 else "" gwr_codes_data = {self.GWRKey: gwr_codes, self.LUDKey: last_updated_date} path_to_pickle = self._cdd_depots(self.GWRPickle + ".pickle") save_pickle(gwr_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) return gwr_codes_data
[docs] def fetch_gwr_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch `Great Western Railway (GWR) depot codes <http://www.railwaycodes.org.uk/depots/depots4.shtm>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of GWR depot codes and date of when the data was last updated :rtype: dict **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> gwr_codes_dat = depots.fetch_gwr_codes() >>> gwr_codes = gwr_codes_dat['GWR codes'] >>> type(gwr_codes) <class 'dict'> >>> print(list(gwr_codes.keys())) ['Alphabetical codes', 'Numerical codes'] >>> gwr_codes_alpha = gwr_codes['Alphabetical codes'] >>> type(gwr_codes_alpha) <class 'pandas.core.frame.DataFrame'> >>> print(gwr_codes_alpha.head()) Code Depot name 0 ABEEG Aberbeeg 1 ABG Aberbeeg 2 AYN Abercynon 3 ABDR Aberdare 4 ABH Aberystwyth """ path_to_pickle = self._cdd_depots(self.GWRPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: gwr_codes_data = load_pickle(path_to_pickle) else: verbose_ = False if data_dir or not verbose else (2 if verbose == 2 else True) gwr_codes_data = self.collect_gwr_codes(confirmation_required=False, verbose=verbose_) if gwr_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(gwr_codes_data, path_to_pickle, verbose=verbose) else: print("No data of \"{}\" has been freshly collected.".format(self.GWRKey)) gwr_codes_data = load_pickle(path_to_pickle) return gwr_codes_data
[docs] def fetch_depot_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch `depots codes <http://www.railwaycodes.org.uk/depots/depots0.shtm>`_ from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str or None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of depot codes and date of when the data was last updated :rtype: dict **Example**:: >>> from pyrcs.other_assets import Depots >>> depots = Depots() >>> depot_codes_dat = depots.fetch_depot_codes() >>> type(depot_codes_dat) <class 'dict'> >>> print(list(depot_codes_dat.keys())) ['Depots', 'Last updated date'] """ verbose_ = False if (data_dir or not verbose) else (2 if verbose == 2 else True) depot_codes = [] for func in dir(self): if func.startswith('fetch_') and func != 'fetch_depot_codes': depot_codes.append(getattr(self, func)( update=update, verbose=verbose_ if is_internet_connected() else False)) depot_codes_data = { self.Key: {next(iter(x)): next(iter(x.values())) for x in depot_codes}, self.LUDKey: self.Date} if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join( self.CurrentDataDir, self.Key.lower() + ".pickle") save_pickle(depot_codes_data, path_to_pickle, verbose=verbose) return depot_codes_data