- from subprocess import check_call
- import os
- import os.path as op
- import sys
- import shutil as sh
- import yaml
- from nbclean import NotebookCleaner
- from tqdm import tqdm
- import numpy as np
- from glob import glob
- from uuid import uuid4
- import argparse
- DESCRIPTION = ("Convert a collection of Jupyter Notebooks into Jekyll "
- "markdown suitable for a course textbook.")
- this_folder = op.dirname(op.abspath(__file__))
- sys.path.append(op.join(this_folder, 'scripts'))
- from jupyterbook.utils import (_split_yaml, _check_url_page, _prepare_toc,
- _prepare_url, _clean_notebook_cells, _error)
- parser = argparse.ArgumentParser(description=DESCRIPTION)
- parser.add_argument("--site-root", default=None, help="Path to the root of the textbook repository.")
- parser.add_argument("--path-template", default=None, help="Path to the template nbconvert uses to build markdown files")
- parser.add_argument("--path-config", default=None, help="Path to the Jekyll configuration file")
- parser.add_argument("--path-toc", default=None, help="Path to the Table of Contents YAML file")
- parser.add_argument("--overwrite", action='store_true', help="Overwrite md files if they already exist.")
- parser.add_argument("--execute", action='store_true', help="Execute notebooks before converting to MD.")
- parser.add_argument("--local-build", action='store_true',
- help="Specify you are building site locally for later upload.")
- parser.set_defaults(overwrite=False, execute=False)
- BUILD_FOLDER_NAME = "_build"
- SUPPORTED_FILE_SUFFIXES = ['.ipynb', '.md']
- def _clean_lines(lines, filepath):
- """Replace images with jekyll image root and add escape chars as needed."""
- inline_replace_chars = ['#']
- path_rel_root = op.relpath(PATH_SITE_ROOT, op.dirname(filepath))
- path_rel_root_one_up = path_rel_root.replace('../', '', 1)
- for ii, line in enumerate(lines):
- if path_rel_root in line:
- line = line.replace(path_rel_root, path_rel_root_one_up)
- line = line.replace(PATH_IMAGES_FOLDER, op.relpath(PATH_IMAGES_FOLDER, op.dirname(filepath)))
- dollars = np.where(['$' == char for char in line])[0]
- if len(dollars) > 2 and all(ii > 1 for ii in (dollars[1:] - dollars[:1])):
- for char in inline_replace_chars:
- line = line.replace('\\{}'.format(char), '\\\\{}'.format(char))
- line = line.replace(' \\$', ' \\\\$')
- lines[ii] = line
- return lines
- def _copy_non_content_files():
- """Copy non-markdown/notebook files in the content folder into build folder so relative links work."""
- all_files = glob(op.join(PATH_CONTENT_FOLDER, '**', '*'), recursive=True)
- non_content_files = [ii for ii in all_files if not any(ii.endswith(ext) for ext in SUPPORTED_FILE_SUFFIXES)]
- for ifile in non_content_files:
- if op.isdir(ifile):
- continue
- new_path = ifile.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
- if not op.isdir(op.dirname(new_path)):
- os.makedirs(op.dirname(new_path))
- sh.copy2(ifile, new_path)
- def _case_sensitive_fs(path):
- """True when filesystem at `path` is case sensitive, False otherwise.
- Checks this by attempting to write two files, one w/ upper case, one
- with lower. If after this only one file exists, the system is case-insensitive.
- Makes directory `path` if it does not exist.
- """
- if not op.exists(path):
- os.makedirs(path)
- root = op.join(path, uuid4().hex)
- fnames = [root + suffix for suffix in 'aA']
- try:
- for fname in fnames:
- with open(fname, 'wt') as fobj:
- fobj.write('text')
- written = glob(root + '*')
- finally:
- for fname in written:
- os.unlink(fname)
- return len(written) == 2
- if __name__ == '__main__':
- args = parser.parse_args()
- overwrite = bool(args.overwrite)
- execute = bool(args.execute)
- if args.site_root is None:
- args.site_root = op.join(op.dirname(op.abspath(__file__)), '..')
- PATH_SITE_ROOT = op.abspath(args.site_root)
- PATH_TOC_YAML = args.path_toc if args.path_toc is not None else op.join(PATH_SITE_ROOT, '_data', 'toc.yml')
- CONFIG_FILE = args.path_config if args.path_config is not None else op.join(PATH_SITE_ROOT, '_config.yml')
- PATH_TEMPLATE = args.path_template if args.path_template is not None else op.join(PATH_SITE_ROOT, 'scripts', 'templates', 'jekyllmd.tpl')
- PATH_IMAGES_FOLDER = op.join(PATH_SITE_ROOT, '_build', 'images')
- with open(CONFIG_FILE, 'r') as ff:
- site_yaml = yaml.load(ff.read())
- CONTENT_FOLDER_NAME = site_yaml.get('content_folder_name').strip('/')
- if not op.exists(PATH_TOC_YAML):
- _error("No toc.yml file found, please create one")
- with open(PATH_TOC_YAML, 'r') as ff:
- toc = yaml.load(ff.read())
- toc = _prepare_toc(toc)
- n_skipped_files = 0
- n_built_files = 0
- case_check = _case_sensitive_fs(BUILD_FOLDER) and args.local_build
- print("Convert and copy notebook/md files...")
- for ix_file, page in enumerate(tqdm(list(toc))):
- url_page = page.get('url', None)
- title = page.get('title', None)
- _check_url_page(url_page, CONTENT_FOLDER_NAME)
- path_url_page = os.path.join(PATH_CONTENT_FOLDER, url_page.lstrip('/'))
- path_url_folder = os.path.dirname(path_url_page)
- if op.exists(path_url_page + suf):
- path_url_page = path_url_page + suf
- break
- if not op.exists(path_url_page):
- raise _error("Could not find file called {} with any of these extensions: {}".format(path_url_page, SUPPORTED_FILE_SUFFIXES))
- path_new_folder = path_url_folder.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
- path_new_file = op.join(path_new_folder, op.basename(path_url_page).replace('.ipynb', '.md'))
- if overwrite is False and op.exists(path_new_file) \
- and os.stat(path_new_file).st_mtime > os.stat(path_url_page).st_mtime:
- n_skipped_files += 1
- continue
- if not op.isdir(path_new_folder):
- os.makedirs(path_new_folder)
- if ix_file == 0:
- url_prev_page = ''
- prev_file_title = ''
- else:
- prev_file_title = toc[ix_file-1].get('title')
- url_prev_page = toc[ix_file-1].get('url')
- url_prev_page = _prepare_url(url_prev_page)
- if ix_file == len(toc) - 1:
- url_next_page = ''
- next_file_title = ''
- else:
- next_file_title = toc[ix_file+1].get('title')
- url_next_page = toc[ix_file+1].get('url')
- url_next_page = _prepare_url(url_next_page)
- if path_url_page.endswith('.ipynb'):
- tmp_notebook = path_url_page + '_TMP'
- sh.copy2(path_url_page, tmp_notebook)
- cleaner = NotebookCleaner(tmp_notebook)
- cleaner.remove_cells(empty=True)
- if site_yaml.get('hide_cell_text', False):
- cleaner.remove_cells(search_text=site_yaml.get('hide_cell_text'))
- if site_yaml.get('hide_code_text', False):
- cleaner.clear(kind="content", search_text=site_yaml.get('hide_code_text'))
- cleaner.clear('stderr')
- cleaner.save(tmp_notebook)
- _clean_notebook_cells(tmp_notebook)
- build_call = '--FilesWriter.build_directory={}'.format(path_new_folder)
- path_after_build_folder = path_new_folder.split(os.sep + BUILD_FOLDER_NAME + os.sep)[-1]
- nb_output_folder = op.join(PATH_IMAGES_FOLDER, path_after_build_folder)
- images_call = '--NbConvertApp.output_files_dir={}'.format(nb_output_folder)
- call = ['jupyter', 'nbconvert', '--log-level="CRITICAL"',
- '--to', 'markdown', '--template', PATH_TEMPLATE,
- images_call, build_call, tmp_notebook]
- if execute is True:
- call.insert(-1, '--execute')
- check_call(call)
- os.remove(tmp_notebook)
- elif path_url_page.endswith('.md'):
- sh.copy2(path_url_page, path_new_file)
- else:
- raise _error("Files must end in ipynb or md. Found file {}".format(path_url_page))
- with open(path_new_file, 'r') as ff:
- lines = ff.readlines()
- lines = _clean_lines(lines, path_new_file)
- yaml_orig, lines = _split_yaml(lines)
- yaml_fm = []
- yaml_fm += ['---']
- sanitized = url_page.lower().replace('_', '-')
- if sanitized != url_page:
- if case_check and url_page.lower() == sanitized:
- raise RuntimeError(
- 'Redirect {} clashes with page {} for local build on '
- 'case-insensitive FS\n'.format(sanitized, url_page) +
- 'Rename source page to lower case or build on a case '
- 'sensitive FS, e.g. case-sensitive disk image on Mac')
- yaml_fm += ['redirect_from:']
- yaml_fm += [' - "{}"'.format(sanitized)]
- if ix_file == 0:
- if not sanitized != url_page:
- yaml_fm += ['redirect_from:']
- yaml_fm += [' - "/"']
- if path_url_page.endswith('.ipynb'):
- interact_path = 'content/' + path_url_page.split('content/')[-1]
- yaml_fm += ['interact_link: {}'.format(interact_path)]
- yaml_fm += ["title: '{}'".format(title)]
- yaml_fm += ['prev_page:']
- yaml_fm += [' url: {}'.format(url_prev_page)]
- yaml_fm += [" title: '{}'".format(prev_file_title)]
- yaml_fm += ['next_page:']
- yaml_fm += [' url: {}'.format(url_next_page)]
- yaml_fm += [" title: '{}'".format(next_file_title)]
- yaml_fm += yaml_orig
- yaml_fm += ['---']
- yaml_fm = [ii + '\n' for ii in yaml_fm]
- lines = yaml_fm + lines
- with open(path_new_file, 'w') as ff:
- ff.writelines(lines)
- n_built_files += 1
- print('Copying non-content files inside `{}/`...'.format(CONTENT_FOLDER_NAME))
- _copy_non_content_files()
- print("\n===========")
- print("Generated {} new files\nSkipped {} already-built files".format(n_built_files, n_skipped_files))
- if n_built_files == 0:
- print("Delete the markdown files in '{}' for any pages that you wish to re-build, or use --overwrite option to re-build all.".format(BUILD_FOLDER_NAME))
- print("\nYour Jupyter Book is now in `{}/`.".format(BUILD_FOLDER_NAME))
- print("\nDemo your Jupyter book with `make serve` or push to GitHub!")
- print('===========\n')