Source code for pyrcs.other_assets.depots

""" Collecting depots codes.

Data source: http://www.railwaycodes.org.uk/depots/depots0.shtm
"""

import copy
import os
import re
import urllib.parse

import bs4
import pandas as pd
import requests
from pyhelpers.dir import cd, validate_input_data_dir
from pyhelpers.ops import confirmed, fake_requests_headers
from pyhelpers.store import load_pickle, save_pickle

from pyrcs.utils import cd_dat, get_catalogue, get_last_updated_date, homepage_url


[docs]class Depots: """ A class for collecting depot codes. :param data_dir: name of data directory, defaults to ``None`` :type data_dir: str, None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool **Example**:: from pyrcs.other_assets import Depots depots = Depots() print(depots.Name) # Depot codes print(depots.SourceURL) # http://www.railwaycodes.org.uk/depots/depots0.shtm """ def __init__(self, data_dir=None, update=False): """ Constructor method. """ self.Name = 'Depot codes' self.HomeURL = homepage_url() self.SourceURL = urllib.parse.urljoin(self.HomeURL, '/depots/depots0.shtm') self.Catalogue = get_catalogue(self.SourceURL, update=update, confirmation_required=False) self.Date = get_last_updated_date(self.SourceURL, parsed=True, as_date_type=False) self.Key = 'Depots' self.LUDKey = 'Last updated date' # key to last updated date self.DataDir = validate_input_data_dir(data_dir) if data_dir else cd_dat("other-assets", self.Key.lower()) self.CurrentDataDir = copy.copy(self.DataDir) self.TCTKey, self.FDPTKey, self.S1950Key, self.GWRKey = list(self.Catalogue.keys())[1:] self.TCTPickle = self.TCTKey.replace(" ", "-").lower() self.FDPTPickle = re.sub(r'[ -]', '-', self.FDPTKey).lower() self.S1950Pickle = re.sub(r' \(|\) | ', '-', self.S1950Key).lower() self.GWRPickle = self.GWRKey.replace(" ", "-").lower() def cdd_depots(self, *sub_dir, **kwargs): """ Change directory to "dat\\other-assets\\depots\\" and sub-directories (and/or a file) :param sub_dir: sub-directory or sub-directories (and/or a file) :type sub_dir: str :param kwargs: optional parameters of `os.makedirs <https://docs.python.org/3/library/os.html#os.makedirs>`_, e.g. ``mode=0o777`` :return: path to the backup data directory for ``Depots`` :rtype: str :meta private: """ path = cd(self.DataDir, *sub_dir, mkdir=True, **kwargs) return path
[docs] def collect_two_char_tops_codes(self, confirmation_required=True, verbose=False): """ Collect two-character TOPS codes from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict, None **Example**:: from pyrcs.other_assets import Depots depots = Depots() confirmation_required = True two_char_tops_codes_data = depots.collect_two_char_tops_codes(confirmation_required) # To collect data of two character TOPS codes? [No]|Yes: # >? yes print(two_char_tops_codes_data) # {'Two character TOPS codes': <codes>, # 'Last updated date': <date>} """ if confirmed("To collect data of {}?".format(self.TCTKey[:1].lower() + self.TCTKey[1:]), confirmation_required=confirmation_required): url = self.Catalogue[self.TCTKey] if verbose == 2: print("Collecting data of {}".format(self.TCTKey[:1].lower() + self.TCTKey[1:]), end=" ... ") try: header, two_char_tops_codes = pd.read_html(url, na_values=[''], keep_default_na=False) two_char_tops_codes.columns = header.columns.to_list() two_char_tops_codes.fillna('', inplace=True) last_updated_date = get_last_updated_date(url) two_char_tops_codes_data = {self.TCTKey: two_char_tops_codes, self.LUDKey: last_updated_date} print("Done. ") if verbose == 2 else "" path_to_pickle = self.cdd_depots(self.TCTPickle + ".pickle") save_pickle(two_char_tops_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) two_char_tops_codes_data = None return two_char_tops_codes_data
[docs] def fetch_two_char_tops_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch two-character TOPS codes from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str, None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict **Example**:: from pyrcs.other_assets import Depots depots = Depots() update = False pickle_it = False data_dir = None two_char_tops_codes_data = depots.fetch_two_char_tops_codes(update, pickle_it, data_dir) print(two_char_tops_codes_data) # {'Two character TOPS codes': <codes>, # 'Last updated date': <date>} """ path_to_pickle = self.cdd_depots(self.TCTPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: two_char_tops_codes_data = load_pickle(path_to_pickle) else: two_char_tops_codes_data = self.collect_two_char_tops_codes( confirmation_required=False, verbose=False if data_dir or not verbose else True) if two_char_tops_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, self.TCTPickle + ".pickle") save_pickle(two_char_tops_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been collected.".format(self.TCTKey[:1].lower() + self.TCTKey[1:])) return two_char_tops_codes_data
[docs] def collect_four_digit_pre_tops_codes(self, confirmation_required=True, verbose=False): """ Collect four-digit pre-TOPS codes from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict, None **Example**:: from pyrcs.other_assets import Depots depots = Depots() confirmation_required = True four_digit_pre_tops_codes = depots.collect_four_digit_pre_tops_codes(confirmation_required) # To collect data of four digit pre-TOPS codes? [No]|Yes: # >? yes print(four_digit_pre_tops_codes) # {'Four digit pre-TOPS codes': <codes>, # 'Last updated date': <date>} """ if confirmed("To collect data of {}?".format(self.FDPTKey[:1].lower() + self.FDPTKey[1:]), confirmation_required=confirmation_required): path_to_pickle = self.cdd_depots(self.FDPTPickle + ".pickle") url = self.Catalogue[self.FDPTKey] if verbose == 2: print("Collecting data of {}".format(self.FDPTKey[:1].lower() + self.FDPTKey[1:]), end=" ... ") try: source = requests.get(url, headers=fake_requests_headers()) p_tags = bs4.BeautifulSoup(source.text, 'lxml').find_all('p') region_names = [x.text.replace('Jump to: ', '').strip().split(' | ') for x in p_tags if x.text.startswith('Jump to: ')][0] data_sets = iter(pd.read_html(source.text, na_values=[''], keep_default_na=False)) four_digit_pre_tops_codes_list = [] for x in data_sets: header, four_digit_pre_tops_codes_data = x, next(data_sets) four_digit_pre_tops_codes_data.columns = header.columns.to_list() four_digit_pre_tops_codes_list.append(four_digit_pre_tops_codes_data) last_updated_date = get_last_updated_date(url) four_digit_pre_tops_codes_data = {self.FDPTKey: dict(zip(region_names, four_digit_pre_tops_codes_list)), self.LUDKey: last_updated_date} print("Done. ") if verbose == 2 else "" save_pickle(four_digit_pre_tops_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) four_digit_pre_tops_codes_data = None return four_digit_pre_tops_codes_data
[docs] def fetch_four_digit_pre_tops_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch four-digit pre-TOPS codes from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str, None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of two-character TOPS codes and date of when the data was last updated :rtype: dict **Example**:: from pyrcs.other_assets import Depots depots = Depots() update = False pickle_it = False data_dir = None four_digit_pretops_codes = depots.fetch_four_digit_pre_tops_codes(update, pickle_it, data_dir) print(four_digit_pretops_codes) # {'Four digit pre-TOPS codes': <codes>, # 'Last updated date': <date>} """ path_to_pickle = self.cdd_depots(self.FDPTPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: four_digit_pre_tops_codes_data = load_pickle(path_to_pickle) else: four_digit_pre_tops_codes_data = self.collect_four_digit_pre_tops_codes( confirmation_required=False, verbose=False if data_dir or not verbose else True) if four_digit_pre_tops_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(four_digit_pre_tops_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been collected.".format(self.FDPTKey[:1].lower() + self.FDPTKey[1:])) return four_digit_pre_tops_codes_data
[docs] def collect_1950_system_codes(self, confirmation_required=True, verbose=False): """ Collect 1950 system (pre-TOPS) codes from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of 1950 system (pre-TOPS) codes and date of when the data was last updated :rtype: dict, None **Example**:: from pyrcs.other_assets import Depots depots = Depots() confirmation_required = True system_1950_codes_data = depots.collect_1950_system_codes(confirmation_required) # To collect data of 1950 system (pre-TOPS) codes? [No]|Yes: # >? yes print(system_1950_codes_data) # {'1950 system (pre-TOPS) codes': <codes>, # 'Last updated date': <date>} """ if confirmed("To collect data of {}?".format(self.S1950Key), confirmation_required=confirmation_required): url = self.Catalogue[self.S1950Key] if verbose == 2: print("Collecting data of {}".format(self.S1950Key), end=" ... ") try: header, system_1950_codes = pd.read_html(url, na_values=[''], keep_default_na=False) system_1950_codes.columns = header.columns.to_list() last_updated_date = get_last_updated_date(url) system_1950_codes_data = {self.S1950Key: system_1950_codes, self.LUDKey: last_updated_date} print("Done. ") if verbose == 2 else "" path_to_pickle = self.cdd_depots(self.S1950Pickle + ".pickle") save_pickle(system_1950_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) system_1950_codes_data = None return system_1950_codes_data
[docs] def fetch_1950_system_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch 1950 system (pre-TOPS) codes from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str, None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of 1950 system (pre-TOPS) codes and date of when the data was last updated :rtype: dict **Example**:: from pyrcs.other_assets import Depots depots = Depots() update = False pickle_it = False data_dir = None system_1950_codes_data = depots.fetch_1950_system_codes(update, pickle_it, data_dir) print(system_1950_codes_data) # {'1950 system (pre-TOPS) codes': <codes>, # 'Last updated date': <date>} """ path_to_pickle = self.cdd_depots(self.S1950Pickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: system_1950_codes_data = load_pickle(path_to_pickle) else: system_1950_codes_data = self.collect_1950_system_codes( confirmation_required=False, verbose=False if data_dir or not verbose else True) if system_1950_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(system_1950_codes_data, path_to_pickle, verbose=verbose) else: print("No data of {} has been collected.".format(self.S1950Key)) return system_1950_codes_data
[docs] def collect_gwr_codes(self, confirmation_required=True, verbose=False): """ Collect Great Western Railway (GWR) depot codes from source web page. :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool, int :return: data of GWR depot codes and date of when the data was last updated :rtype: dict, None **Example**:: from pyrcs.other_assets import Depots depots = Depots() confirmation_required = True gwr_codes_data = depots.collect_gwr_codes(confirmation_required) # To collect data of GWR codes? [No]|Yes: # >? yes print(gwr_codes_data) # {'GWR codes': <codes>, # 'Last updated date': <date>} """ if confirmed("To collect data of {}?".format(self.GWRKey), confirmation_required=confirmation_required): url = self.Catalogue[self.GWRKey] if verbose == 2: print("Collecting data of {}".format(self.GWRKey), end=" ... ") try: header, alphabetical_codes, numerical_codes_1, _, numerical_codes_2 = pd.read_html(url) # Alphabetical codes alphabetical_codes.columns = header.columns.to_list() # Numerical codes numerical_codes_1.drop(1, axis=1, inplace=True) numerical_codes_1.columns = header.columns.to_list() numerical_codes_2.columns = header.columns.to_list() numerical_codes = pd.concat([numerical_codes_1, numerical_codes_2]) source = requests.get(url) soup = bs4.BeautifulSoup(source.text, 'lxml') gwr_codes = dict(zip([x.text for x in soup.find_all('h3')], [alphabetical_codes, numerical_codes])) last_updated_date = get_last_updated_date(url) gwr_codes_data = {self.GWRKey: gwr_codes, self.LUDKey: last_updated_date} print("Done. ") if verbose == 2 else "" path_to_pickle = self.cdd_depots(self.GWRPickle + ".pickle") save_pickle(gwr_codes_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) gwr_codes_data = None return gwr_codes_data
[docs] def fetch_gwr_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch Great Western Railway (GWR) depot codes from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str, None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of GWR depot codes and date of when the data was last updated :rtype: dict **Example**:: from pyrcs.other_assets import Depots depots = Depots() update = False pickle_it = False data_dir = None gwr_codes_data = depots.fetch_gwr_codes(update, pickle_it, data_dir) print(gwr_codes_data) # {'GWR codes': <codes>, # 'Last updated date': <date>} """ path_to_pickle = self.cdd_depots(self.GWRPickle + ".pickle") if os.path.isfile(path_to_pickle) and not update: gwr_codes_data = load_pickle(path_to_pickle) else: gwr_codes_data = self.collect_gwr_codes( confirmation_required=False, verbose=False if data_dir or not verbose else True) if gwr_codes_data: if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, os.path.basename(path_to_pickle)) save_pickle(gwr_codes_data, path_to_pickle, verbose=verbose) else: print("No data of \"{}\" has been collected.".format(self.GWRKey)) return gwr_codes_data
[docs] def fetch_depot_codes(self, update=False, pickle_it=False, data_dir=None, verbose=False): """ Fetch depots codes from local backup. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param pickle_it: whether to replace the current package data with newly collected data, defaults to ``False`` :type pickle_it: bool :param data_dir: name of package data folder, defaults to ``None`` :type data_dir: str, None :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool :return: data of depot codes and date of when the data was last updated :rtype: dict **Example**:: from pyrcs.other_assets import Depots depots = Depots() update = False pickle_it = False data_dir = None depot_codes = depots.fetch_depot_codes(update, pickle_it, data_dir) print(depot_codes) # {'Depots': <codes>, # 'Last updated date': <date>} """ codes = [] for func in dir(self): if func.startswith('fetch_') and func != 'fetch_depot_codes': codes.append(getattr(self, func)(update=update, verbose=verbose)) depot_codes = {self.Key: {next(iter(x)): next(iter(x.values())) for x in codes}, self.LUDKey: self.Date} if pickle_it and data_dir: self.CurrentDataDir = validate_input_data_dir(data_dir) path_to_pickle = os.path.join(self.CurrentDataDir, self.Key.lower() + ".pickle") save_pickle(depot_codes, path_to_pickle, verbose=verbose) return depot_codes