123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- from subprocess import check_call
- import os
- import os.path as op
- import sys
- import shutil as sh
- import yaml
- from nbclean import NotebookCleaner
- from tqdm import tqdm
- import numpy as np
- from glob import glob
- from uuid import uuid4
- import argparse
- DESCRIPTION = ("Convert a collection of Jupyter Notebooks into Jekyll "
- "markdown suitable for a course textbook.")
- # Add path to our utility functions
- this_folder = op.dirname(op.abspath(__file__))
- sys.path.append(op.join(this_folder, 'scripts'))
- from jupyterbook.utils import (_split_yaml, _check_url_page, _prepare_toc,
- _prepare_url, _clean_notebook_cells, _error)
- parser = argparse.ArgumentParser(description=DESCRIPTION)
- parser.add_argument("--site-root", default=None, help="Path to the root of the textbook repository.")
- parser.add_argument("--path-template", default=None, help="Path to the template nbconvert uses to build markdown files")
- parser.add_argument("--path-config", default=None, help="Path to the Jekyll configuration file")
- parser.add_argument("--path-toc", default=None, help="Path to the Table of Contents YAML file")
- parser.add_argument("--overwrite", action='store_true', help="Overwrite md files if they already exist.")
- parser.add_argument("--execute", action='store_true', help="Execute notebooks before converting to MD.")
- parser.add_argument("--local-build", action='store_true',
- help="Specify you are building site locally for later upload.")
- parser.set_defaults(overwrite=False, execute=False)
- # Defaults
- BUILD_FOLDER_NAME = "_build"
- SUPPORTED_FILE_SUFFIXES = ['.ipynb', '.md']
- def _clean_lines(lines, filepath):
- """Replace images with jekyll image root and add escape chars as needed."""
- inline_replace_chars = ['#']
- # Images: replace absolute nbconvert image paths to baseurl paths
- path_rel_root = op.relpath(PATH_SITE_ROOT, op.dirname(filepath))
- path_rel_root_one_up = path_rel_root.replace('../', '', 1)
- for ii, line in enumerate(lines):
- # Handle relative paths because we remove `content/` from the URL
- # If there's a path that goes back to the root, remove a level`
- # This is for images referenced directly in the markdown
- if path_rel_root in line:
- line = line.replace(path_rel_root, path_rel_root_one_up)
- # For programmatically-generated images from notebooks, replace the abspath with relpath
- line = line.replace(PATH_IMAGES_FOLDER, op.relpath(PATH_IMAGES_FOLDER, op.dirname(filepath)))
- # Adding escape slashes since Jekyll removes them when it serves the page
- # Make sure we have at least two dollar signs and they
- # Aren't right next to each other
- dollars = np.where(['$' == char for char in line])[0]
- if len(dollars) > 2 and all(ii > 1 for ii in (dollars[1:] - dollars[:1])):
- for char in inline_replace_chars:
- line = line.replace('\\{}'.format(char), '\\\\{}'.format(char))
- line = line.replace(' \\$', ' \\\\$')
- lines[ii] = line
- return lines
- def _copy_non_content_files():
- """Copy non-markdown/notebook files in the content folder into build folder so relative links work."""
- all_files = glob(op.join(PATH_CONTENT_FOLDER, '**', '*'), recursive=True)
- non_content_files = [ii for ii in all_files if not any(ii.endswith(ext) for ext in SUPPORTED_FILE_SUFFIXES)]
- for ifile in non_content_files:
- if op.isdir(ifile):
- continue
- # The folder name may change if the permalink sanitizing changes it.
- # this ensures that a new folder exists if needed
- new_path = ifile.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
- if not op.isdir(op.dirname(new_path)):
- os.makedirs(op.dirname(new_path))
- sh.copy2(ifile, new_path)
- def _case_sensitive_fs(path):
- """True when filesystem at `path` is case sensitive, False otherwise.
- Checks this by attempting to write two files, one w/ upper case, one
- with lower. If after this only one file exists, the system is case-insensitive.
- Makes directory `path` if it does not exist.
- """
- if not op.exists(path):
- os.makedirs(path)
- root = op.join(path, uuid4().hex)
- fnames = [root + suffix for suffix in 'aA']
- try:
- for fname in fnames:
- with open(fname, 'wt') as fobj:
- fobj.write('text')
- written = glob(root + '*')
- finally:
- for fname in written:
- os.unlink(fname)
- return len(written) == 2
- if __name__ == '__main__':
- ###############################################################################
- # Default values and arguments
- args = parser.parse_args()
- overwrite = bool(args.overwrite)
- execute = bool(args.execute)
- if args.site_root is None:
- args.site_root = op.join(op.dirname(op.abspath(__file__)), '..')
- # Paths for our notebooks
- PATH_SITE_ROOT = op.abspath(args.site_root)
- PATH_TOC_YAML = args.path_toc if args.path_toc is not None else op.join(PATH_SITE_ROOT, '_data', 'toc.yml')
- CONFIG_FILE = args.path_config if args.path_config is not None else op.join(PATH_SITE_ROOT, '_config.yml')
- PATH_TEMPLATE = args.path_template if args.path_template is not None else op.join(PATH_SITE_ROOT, 'scripts', 'templates', 'jekyllmd.tpl')
- PATH_IMAGES_FOLDER = op.join(PATH_SITE_ROOT, '_build', 'images')
- BUILD_FOLDER = op.join(PATH_SITE_ROOT, BUILD_FOLDER_NAME)
- ###############################################################################
- # Read in textbook configuration
- # Load the yaml for this site
- with open(CONFIG_FILE, 'r') as ff:
- site_yaml = yaml.load(ff.read())
- CONTENT_FOLDER_NAME = site_yaml.get('content_folder_name').strip('/')
- PATH_CONTENT_FOLDER = op.join(PATH_SITE_ROOT, CONTENT_FOLDER_NAME)
- # Load the textbook yaml for this site
- if not op.exists(PATH_TOC_YAML):
- _error("No toc.yml file found, please create one")
- with open(PATH_TOC_YAML, 'r') as ff:
- toc = yaml.load(ff.read())
- # Drop divider items and non-linked pages in the sidebar, un-nest sections
- toc = _prepare_toc(toc)
- ###############################################################################
- # Generating the Jekyll files for all content
- n_skipped_files = 0
- n_built_files = 0
- case_check = _case_sensitive_fs(BUILD_FOLDER) and args.local_build
- print("Convert and copy notebook/md files...")
- for ix_file, page in enumerate(tqdm(list(toc))):
- url_page = page.get('url', None)
- title = page.get('title', None)
- # Make sure URLs (file paths) have correct structure
- _check_url_page(url_page, CONTENT_FOLDER_NAME)
- ###############################################################################
- # Create path to old/new file and create directory
- # URL will be relative to the CONTENT_FOLDER
- path_url_page = os.path.join(PATH_CONTENT_FOLDER, url_page.lstrip('/'))
- path_url_folder = os.path.dirname(path_url_page)
- # URLs shouldn't have the suffix in there already so now we find which one to add
- for suf in SUPPORTED_FILE_SUFFIXES:
- if op.exists(path_url_page + suf):
- path_url_page = path_url_page + suf
- break
- if not op.exists(path_url_page):
- raise _error("Could not find file called {} with any of these extensions: {}".format(path_url_page, SUPPORTED_FILE_SUFFIXES))
- # Create and check new folder / file paths
- path_new_folder = path_url_folder.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
- path_new_file = op.join(path_new_folder, op.basename(path_url_page).replace('.ipynb', '.md'))
- if overwrite is False and op.exists(path_new_file) \
- and os.stat(path_new_file).st_mtime > os.stat(path_url_page).st_mtime:
- n_skipped_files += 1
- continue
- if not op.isdir(path_new_folder):
- os.makedirs(path_new_folder)
- ###############################################################################
- # Generate previous/next page URLs
- if ix_file == 0:
- url_prev_page = ''
- prev_file_title = ''
- else:
- prev_file_title = toc[ix_file-1].get('title')
- url_prev_page = toc[ix_file-1].get('url')
- url_prev_page = _prepare_url(url_prev_page)
- if ix_file == len(toc) - 1:
- url_next_page = ''
- next_file_title = ''
- else:
- next_file_title = toc[ix_file+1].get('title')
- url_next_page = toc[ix_file+1].get('url')
- url_next_page = _prepare_url(url_next_page)
- ###############################################################################
- # Content conversion
- # Convert notebooks or just copy md if no notebook.
- if path_url_page.endswith('.ipynb'):
- # Create a temporary version of the notebook we can modify
- tmp_notebook = path_url_page + '_TMP'
- sh.copy2(path_url_page, tmp_notebook)
- ###############################################################################
- # Notebook cleaning
- # Clean up the file before converting
- cleaner = NotebookCleaner(tmp_notebook)
- cleaner.remove_cells(empty=True)
- if site_yaml.get('hide_cell_text', False):
- cleaner.remove_cells(search_text=site_yaml.get('hide_cell_text'))
- if site_yaml.get('hide_code_text', False):
- cleaner.clear(kind="content", search_text=site_yaml.get('hide_code_text'))
- cleaner.clear('stderr')
- cleaner.save(tmp_notebook)
- _clean_notebook_cells(tmp_notebook)
- ###############################################################################
- # Conversion to Jekyll Markdown
- # Run nbconvert moving it to the output folder
- # This is the output directory for `.md` files
- build_call = '--FilesWriter.build_directory={}'.format(path_new_folder)
- # Copy notebook output images to the build directory using the base folder name
- path_after_build_folder = path_new_folder.split(os.sep + BUILD_FOLDER_NAME + os.sep)[-1]
- nb_output_folder = op.join(PATH_IMAGES_FOLDER, path_after_build_folder)
- images_call = '--NbConvertApp.output_files_dir={}'.format(nb_output_folder)
- call = ['jupyter', 'nbconvert', '--log-level="CRITICAL"',
- '--to', 'markdown', '--template', PATH_TEMPLATE,
- images_call, build_call, tmp_notebook]
- if execute is True:
- call.insert(-1, '--execute')
- check_call(call)
- os.remove(tmp_notebook)
- elif path_url_page.endswith('.md'):
- # If a non-notebook file, just copy it over.
- # If markdown we'll add frontmatter later
- sh.copy2(path_url_page, path_new_file)
- else:
- raise _error("Files must end in ipynb or md. Found file {}".format(path_url_page))
- ###############################################################################
- # Modify the generated Markdown to work with Jekyll
- # Clean markdown for Jekyll quirks (e.g. extra escape characters)
- with open(path_new_file, 'r') as ff:
- lines = ff.readlines()
- lines = _clean_lines(lines, path_new_file)
- # Split off original yaml
- yaml_orig, lines = _split_yaml(lines)
- # Front-matter YAML
- yaml_fm = []
- yaml_fm += ['---']
- # In case pre-existing links are sanitized
- sanitized = url_page.lower().replace('_', '-')
- if sanitized != url_page:
- if case_check and url_page.lower() == sanitized:
- raise RuntimeError(
- 'Redirect {} clashes with page {} for local build on '
- 'case-insensitive FS\n'.format(sanitized, url_page) +
- 'Rename source page to lower case or build on a case '
- 'sensitive FS, e.g. case-sensitive disk image on Mac')
- yaml_fm += ['redirect_from:']
- yaml_fm += [' - "{}"'.format(sanitized)]
- if ix_file == 0:
- if not sanitized != url_page:
- yaml_fm += ['redirect_from:']
- yaml_fm += [' - "/"']
- if path_url_page.endswith('.ipynb'):
- interact_path = 'content/' + path_url_page.split('content/')[-1]
- yaml_fm += ['interact_link: {}'.format(interact_path)]
- yaml_fm += ["title: '{}'".format(title)]
- yaml_fm += ['prev_page:']
- yaml_fm += [' url: {}'.format(url_prev_page)]
- yaml_fm += [" title: '{}'".format(prev_file_title)]
- yaml_fm += ['next_page:']
- yaml_fm += [' url: {}'.format(url_next_page)]
- yaml_fm += [" title: '{}'".format(next_file_title)]
- # Add back any original YaML, and end markers
- yaml_fm += yaml_orig
- yaml_fm += ['comment: "***PROGRAMMATICALLY GENERATED, DO NOT EDIT. SEE ORIGINAL FILES IN /{}***"'.format(CONTENT_FOLDER_NAME)]
- yaml_fm += ['---']
- yaml_fm = [ii + '\n' for ii in yaml_fm]
- lines = yaml_fm + lines
- # Write the result
- with open(path_new_file, 'w') as ff:
- ff.writelines(lines)
- n_built_files += 1
- ###############################################################################
- # Finishing up...
- # Copy non-markdown files in notebooks/ in case they're referenced in the notebooks
- print('Copying non-content files inside `{}/`...'.format(CONTENT_FOLDER_NAME))
- _copy_non_content_files()
- # Message at the end
- print("\n===========")
- print("Generated {} new files\nSkipped {} already-built files".format(n_built_files, n_skipped_files))
- if n_built_files == 0:
- print("Delete the markdown files in '{}' for any pages that you wish to re-build, or use --overwrite option to re-build all.".format(BUILD_FOLDER_NAME))
- print("\nYour Jupyter Book is now in `{}/`.".format(BUILD_FOLDER_NAME))
- print("\nDemo your Jupyter book with `make serve` or push to GitHub!")
- print('===========\n')
|