Module pdoc.html_helpers
Helper functions for HTML output.
Expand source code Browse git
"""
Helper functions for HTML output.
"""
import inspect
import os
import re
import subprocess
import textwrap
import traceback
from contextlib import contextmanager
from functools import partial, lru_cache
from typing import Callable, Match
from warnings import warn
import xml.etree.ElementTree as etree
import markdown
from markdown.inlinepatterns import InlineProcessor
from markdown.util import AtomicString
import pdoc
@lru_cache()
def minify_css(css: str,
_whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'),
_comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''),
_trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')):
"""
Minify CSS by removing extraneous whitespace, comments, and trailing semicolons.
"""
return _trailing_semicolon(_whitespace(_comments(css))).strip()
def minify_html(html: str,
_minify=partial(
re.compile(r'(.*?)(<pre\b.*?</pre\b\s*>)|(.*)', re.IGNORECASE | re.DOTALL).sub,
lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): (
_norm_space(m.group(1) or '') +
(m.group(2) or '') +
_norm_space(m.group(3) or '')))):
"""
Minify HTML by replacing all consecutive whitespace with a single space
(or newline) character, except inside `<pre>` tags.
"""
return _minify(html)
def glimpse(text: str, max_length=153, *, paragraph=True,
_split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1),
_trim_last_word=partial(re.compile(r'\S+$').sub, ''),
_remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')):
"""
Returns a short excerpt (e.g. first paragraph) of text.
If `paragraph` is True, the first paragraph will be returned,
but never longer than `max_length` characters.
"""
text = text.lstrip()
if paragraph:
text, *rest = _split_paragraph(text)
if rest:
text = text.rstrip('.')
text += ' …'
text = _remove_titles(text).strip()
if len(text) > max_length:
text = _trim_last_word(text[:max_length - 2])
if not text.endswith('.') or not paragraph:
text = text.rstrip('. ') + ' …'
return text
_md = markdown.Markdown(
output_format='html5', # type: ignore[arg-type]
extensions=[
"markdown.extensions.abbr",
"markdown.extensions.attr_list",
"markdown.extensions.def_list",
"markdown.extensions.fenced_code",
"markdown.extensions.footnotes",
"markdown.extensions.tables",
"markdown.extensions.admonition",
"markdown.extensions.smarty",
"markdown.extensions.toc",
],
extension_configs={
"markdown.extensions.smarty": dict(
smart_dashes=True,
smart_ellipses=True,
smart_quotes=False,
smart_angled_quotes=False,
),
},
)
@contextmanager
def _fenced_code_blocks_hidden(text):
def hide(text):
def replace(match):
orig = match.group()
new = f'@{hash(orig)}@'
hidden[new] = orig
return new
text = re.compile(r'^(?P<fence>```+|~~~+).*\n'
r'(?:.*\n)*?'
r'^(?P=fence)[ ]*(?!.)', re.MULTILINE).sub(replace, text)
return text
def unhide(text):
for k, v in hidden.items():
text = text.replace(k, v)
return text
hidden = {}
# Via a manager object (a list) so modifications can pass back and forth as result[0]
result = [hide(text)]
yield result
result[0] = unhide(result[0])
class _ToMarkdown:
"""
This class serves as a namespace for methods converting common
documentation formats into markdown our Python-Markdown with
addons can ingest.
If debugging regexs (I can't imagine why that would be necessary
— they are all perfect!) an insta-preview tool such as RegEx101.com
will come in handy.
"""
@staticmethod
def _deflist(name, type, desc):
"""
Returns `name`, `type`, and `desc` formatted as a
Python-Markdown definition list entry. See also:
https://python-markdown.github.io/extensions/definition_lists/
"""
# Wrap any identifiers and string literals in parameter type spec
# in backticks while skipping common "stopwords" such as 'or', 'of',
# 'optional' ... See §4 Parameters:
# https://numpydoc.readthedocs.io/en/latest/format.html#sections
type_parts = re.split(r'( *(?: of | or |, *default(?:=|\b)|, *optional\b) *)', type or '')
type_parts[::2] = [f'`{s}`' if s else s
for s in type_parts[::2]]
type = ''.join(type_parts)
desc = desc or ' '
assert _ToMarkdown._is_indented_4_spaces(desc)
assert name or type
ret = ""
if name:
# NOTE: Triple-backtick argument names so we skip linkifying them
ret += f"**```{name.replace(', ', '```**, **```')}```**"
if type:
ret += f' : {type}' if ret else type
ret += f'\n: {desc}\n\n'
return ret
@staticmethod
def _numpy_params(match):
""" Converts NumpyDoc parameter (etc.) sections into Markdown. """
name, type, desc = match.group("name", "type", "desc")
type = type or match.groupdict().get('just_type', None)
desc = desc.strip()
return _ToMarkdown._deflist(name, type, desc)
@staticmethod
def _numpy_seealso(match):
"""
Converts NumpyDoc "See Also" section either into referenced code,
optionally within a definition list.
"""
spec_with_desc, simple_list = match.groups()
if spec_with_desc:
spec_desc_strings = []
for line in filter(None, spec_with_desc.split('\n')):
spec, desc = map(str.strip, line.split(':', 1))
spec_desc_strings.append(f'`{spec}`\n: {desc}')
return '\n\n'.join(spec_desc_strings)
return ', '.join(f'`{i}`' for i in simple_list.split(', '))
@staticmethod
def _numpy_sections(match):
"""
Convert sections with parameter, return, and see also lists to Markdown
lists.
"""
section, body = match.groups()
section = section.title()
if section == 'See Also':
body = re.sub(r'\n\s{4}\s*', ' ', body) # Handle line continuation
body = re.sub(r'^((?:\n?[\w.]* ?: .*)+)|(.*\w.*)',
_ToMarkdown._numpy_seealso, body)
elif section in ('Returns', 'Yields', 'Raises', 'Warns'):
body = re.sub(r'^(?:(?P<name>\*{0,2}\w+(?:, \*{0,2}\w+)*)'
r'(?: ?: (?P<type>.*))|'
r'(?P<just_type>\w[^\n`*]*))(?<!\.)$'
r'(?P<desc>(?:\n(?: {4}.*|$))*)',
_ToMarkdown._numpy_params, body, flags=re.MULTILINE)
elif section in ('Parameters', 'Receives', 'Other Parameters',
'Arguments', 'Args', 'Attributes'):
name = r'(?:\w|\{\w+(?:,\w+)+\})+' # Support curly brace expansion
body = re.sub(r'^(?P<name>\*{0,2}' + name + r'(?:, \*{0,2}' + name + r')*)'
r'(?: ?: (?P<type>.*))?(?<!\.)$'
r'(?P<desc>(?:\n(?: {4}.*|$))*)',
_ToMarkdown._numpy_params, body, flags=re.MULTILINE)
return f'{section}\n-----\n{body}'
@staticmethod
def numpy(text):
"""
Convert `text` in numpydoc docstring format to Markdown
to be further converted later.
"""
return re.sub(r'^(\w[\w ]+)\n-{3,}\n'
r'((?:(?!.+\n-+).*$\n?)*)',
_ToMarkdown._numpy_sections, text, flags=re.MULTILINE)
@staticmethod
def _is_indented_4_spaces(txt, _3_spaces_or_less=re.compile(r'\n\s{0,3}\S').search):
return '\n' not in txt or not _3_spaces_or_less(txt)
@staticmethod
def _fix_indent(name, type, desc):
"""Maybe fix indent from 2 to 4 spaces."""
if not _ToMarkdown._is_indented_4_spaces(desc):
desc = desc.replace('\n', '\n ')
return name, type, desc
@staticmethod
def indent(indent, text, *, clean_first=False):
if clean_first:
text = inspect.cleandoc(text)
return re.sub(r'\n', f'\n{indent}', indent + text.rstrip())
@staticmethod
def google(text):
"""
Convert `text` in Google-style docstring format to Markdown
to be further converted later.
"""
def googledoc_sections(match):
section, body = match.groups('')
if not body:
return match.group()
body = textwrap.dedent(body)
section = section.title()
if section in ('Args', 'Attributes'):
body = re.compile(
r'^([\w*]+)(?: \(([\w.,=\[\] -]+)\))?: '
r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub(
lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups())),
inspect.cleandoc(f'\n{body}')
)
elif section in ('Returns', 'Yields', 'Raises', 'Warns'):
body = re.compile(
r'^()([\w.,\[\] ]+): '
r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub(
lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups())),
inspect.cleandoc(f'\n{body}')
)
# Convert into markdown sections. End underlines with '='
# to avoid matching and re-processing as Numpy sections.
return f'\n{section}\n-----=\n{body}'
text = re.compile(r'^([A-Z]\w+):$\n'
r'((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub(googledoc_sections, text)
return text
@staticmethod
def _admonition(match, module=None, limit_types=None):
indent, type, value, text = match.groups()
if limit_types and type not in limit_types:
return match.group(0)
if type == 'include' and module:
try:
return _ToMarkdown._include_file(indent, value,
_ToMarkdown._directive_opts(text), module)
except Exception as e:
raise RuntimeError(f'`.. include:: {value}` error in module {module.name!r}: {e}')
if type in ('image', 'figure'):
alt_text = text.translate(str.maketrans({
'\n': ' ',
'[': '\\[',
']': '\\]'})).strip()
return f'{indent}![{alt_text}]({value})\n'
if type == 'math':
return _ToMarkdown.indent(indent,
f'\\[ {text.strip()} \\]',
clean_first=True)
if type == 'versionchanged':
title = f'Changed in version: {value}'
elif type == 'versionadded':
title = f'Added in version: {value}'
elif type == 'deprecated' and value:
title = f'Deprecated since version: {value}'
elif type == 'admonition':
title = value
elif type.lower() == 'todo':
title = 'TODO'
text = f'{value} {text}'
else:
title = type.capitalize()
if value:
title += f': {value}'
text = _ToMarkdown.indent(indent + ' ', text, clean_first=True)
return f'{indent}!!! {type} "{title}"\n{text}\n'
@staticmethod
def admonitions(text, module, limit_types=None):
"""
Process reStructuredText's block directives such as
`.. warning::`, `.. deprecated::`, `.. versionadded::`, etc.
and turn them into Python-M>arkdown admonitions.
`limit_types` is optionally a set of directives to limit processing to.
See: https://python-markdown.github.io/extensions/admonition/
"""
substitute = partial(re.compile(r'^(?P<indent> *)\.\. ?(\w+)::(?: *(.*))?'
r'((?:\n(?:(?P=indent) +.*| *$))*)', re.MULTILINE).sub,
partial(_ToMarkdown._admonition, module=module,
limit_types=limit_types))
# Apply twice for nested (e.g. image inside warning)
return substitute(substitute(text))
@staticmethod
def _include_file(indent: str, path: str, options: dict, module: pdoc.Module) -> str:
start_line = int(options.get('start-line', 0))
end_line = int(options.get('end-line', 0)) or None
start_after = options.get('start-after')
end_before = options.get('end-before')
with open(os.path.normpath(os.path.join(os.path.dirname(module.obj.__file__), path)),
encoding='utf-8') as f:
text = ''.join(list(f)[start_line:end_line])
if start_after:
text = text[text.index(start_after) + len(start_after):]
if end_before:
text = text[:text.index(end_before)]
return _ToMarkdown.indent(indent, text)
@staticmethod
def _directive_opts(text: str) -> dict:
return dict(re.findall(r'^ *:([^:]+): *(.*)', text, re.MULTILINE))
DOCTESTS_RE = re.compile(r'^(?:>>> .*)(?:\n.+)*', re.MULTILINE)
@staticmethod
def doctests(text):
"""
Fence non-fenced (`~~~`) top-level (0-indented)
doctest blocks so they render as Python code.
"""
text = _ToMarkdown.DOCTESTS_RE.sub(
lambda match: f'```python-repl\n{match.group()}\n```\n', text)
return text
@staticmethod
def raw_urls(text):
"""Wrap URLs in Python-Markdown-compatible <angle brackets>."""
pattern = re.compile(r"""
(?P<code_span> # matches whole code span
(?<!`)(?P<fence>`+)(?!`) # a string of backticks
.*?
(?<!`)(?P=fence)(?!`))
|
(?P<markdown_link>\[.*?\]\(.*\)) # matches whole inline link
|
(?<![<\"\']) # does not start with <, ", '
(?P<url>(?:http|ftp)s?:// # url with protocol
[^>\s()]+ # url part before any (, )
(?:\([^>\s)]*\))* # optionally url part within parentheses
[^>\s)]* # url part after any )
)""", re.VERBOSE)
text = pattern.sub(
lambda m: (f'<{m.group("url")}>') if m.group('url') else m.group(), text)
return text
class _MathPattern(InlineProcessor):
NAME = 'pdoc-math'
PATTERN = r'(?<!\S|\\)(?:\\\((.+?)\\\)|\\\[(.+?)\\\]|\$\$(.+?)\$\$)'
PRIORITY = 181 # Larger than that of 'escape' pattern
def handleMatch(self, m, data):
for value, is_block in zip(m.groups(), (False, True, True)):
if value:
break
script = etree.Element('script', type=f"math/tex{'; mode=display' if is_block else ''}")
preview = etree.Element('span', {'class': 'MathJax_Preview'})
preview.text = script.text = AtomicString(value)
wrapper = etree.Element('span')
wrapper.extend([preview, script])
return wrapper, m.start(0), m.end(0)
def to_html(text: str, *,
docformat: str = None,
module: pdoc.Module = None, link: Callable[..., str] = None,
latex_math: bool = False):
"""
Returns HTML of `text` interpreted as `docformat`. `__docformat__` is respected
if present, otherwise Numpydoc and Google-style docstrings are assumed,
as well as pure Markdown.
`module` should be the documented module (so the references can be
resolved) and `link` is the hyperlinking function like the one in the
example template.
"""
# Optionally register our math syntax processor
if not latex_math and _MathPattern.NAME in _md.inlinePatterns:
_md.inlinePatterns.deregister(_MathPattern.NAME)
elif latex_math and _MathPattern.NAME not in _md.inlinePatterns:
_md.inlinePatterns.register(_MathPattern(_MathPattern.PATTERN),
_MathPattern.NAME,
_MathPattern.PRIORITY)
md = to_markdown(text, docformat=docformat, module=module, link=link)
return _md.reset().convert(md)
def to_markdown(text: str, *,
docformat: str = None,
module: pdoc.Module = None, link: Callable[..., str] = None):
"""
Returns `text`, assumed to be a docstring in `docformat`, converted to markdown.
`__docformat__` is respected
if present, otherwise Numpydoc and Google-style docstrings are assumed,
as well as pure Markdown.
`module` should be the documented module (so the references can be
resolved) and `link` is the hyperlinking function like the one in the
example template.
"""
if not docformat:
docformat = str(getattr(getattr(module, 'obj', None), '__docformat__', 'numpy,google '))
docformat, *_ = docformat.lower().split()
if not (set(docformat.split(',')) & {'', 'numpy', 'google'}):
warn(f'__docformat__ value {docformat!r} in module {module!r} not supported. '
'Supported values are: numpy, google.')
docformat = 'numpy,google'
with _fenced_code_blocks_hidden(text) as result:
text = result[0]
text = _ToMarkdown.admonitions(text, module)
if 'google' in docformat:
text = _ToMarkdown.google(text)
text = _ToMarkdown.doctests(text)
text = _ToMarkdown.raw_urls(text)
# If doing both, do numpy after google, otherwise google-style's
# headings are incorrectly interpreted as numpy params
if 'numpy' in docformat:
text = _ToMarkdown.numpy(text)
if module and link:
# Hyperlink markdown code spans not within markdown hyperlinks.
# E.g. `code` yes, but not [`code`](...). RE adapted from:
# https://github.com/Python-Markdown/markdown/blob/ada40c66/markdown/inlinepatterns.py#L106
# Also avoid linking triple-backticked arg names in deflists.
linkify = partial(_linkify, link=link, module=module, wrap_code=True)
text = re.sub(r'(?P<inside_link>\[[^\]]*?)?'
r'(?:(?<!\\)(?:\\{2})+(?=`)|(?<!\\)(?P<fence>`+)'
r'(?P<code>.+?)(?<!`)'
r'(?P=fence)(?!`))',
lambda m: (m.group()
if m.group('inside_link') or len(m.group('fence')) > 2
else linkify(m)), text)
result[0] = text
text = result[0]
return text
class ReferenceWarning(UserWarning):
"""
This warning is raised in `to_html` when a object reference in markdown
doesn't match any documented objects.
Look for this warning to catch typos / references to obsolete symbols.
"""
def _linkify(match: Match, *, link: Callable[..., str], module: pdoc.Module, wrap_code=False):
try:
code_span = match.group('code')
except IndexError:
code_span = match.group()
is_type_annotation = re.match(r'^[`\w\s.,\[\]()]+$', code_span)
if not is_type_annotation:
return match.group()
def handle_refname(match):
nonlocal link, module
refname = match.group()
dobj = module.find_ident(refname)
if isinstance(dobj, pdoc.External):
# If this is a single-word reference,
# most likely an argument name. Skip linking External.
if '.' not in refname:
return refname
# If refname in documentation has a typo or is obsolete, warn.
# XXX: Assume at least the first part of refname, i.e. the package, is correct.
module_part = module.find_ident(refname.split('.')[0])
if not isinstance(module_part, pdoc.External):
warn(f'Code reference `{refname}` in module "{module.refname}" does not match any '
'documented object.',
ReferenceWarning, stacklevel=3)
return link(dobj)
if wrap_code:
code_span = code_span.replace('[', '\\[')
linked = re.sub(r'[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*(?:\(\))?', handle_refname, code_span)
if wrap_code:
# Wrapping in HTML <code> as opposed to backticks evaluates markdown */_ markers,
# so let's escape them in text (but not in HTML tag attributes).
# Backticks also cannot be used because html returned from `link()`
# would then become escaped.
# This finds overlapping matches, https://stackoverflow.com/a/5616910/1090455
cleaned = re.sub(r'(_(?=[^>]*?(?:<|$)))', r'\\\1', linked)
return f'<code>{cleaned}</code>'
return linked
def extract_toc(text: str):
"""
Returns HTML Table of Contents containing markdown titles in `text`.
"""
with _fenced_code_blocks_hidden(text) as result:
result[0] = _ToMarkdown.DOCTESTS_RE.sub('', result[0])
text = result[0]
toc, _ = _md.reset().convert(f'[TOC]\n\n@CUT@\n\n{text}').split('@CUT@', 1)
if toc.endswith('<p>'): # CUT was put into its own paragraph
toc = toc[:-3].rstrip()
return toc
def format_git_link(template: str, dobj: pdoc.Doc):
"""
Interpolate `template` as a formatted string literal using values extracted
from `dobj` and the working environment.
"""
if not template:
return None
try:
if 'commit' in _str_template_fields(template):
commit = _git_head_commit()
abs_path = inspect.getfile(inspect.unwrap(dobj.obj))
path = _project_relative_path(abs_path)
# Urls should always use / instead of \\
if os.name == 'nt':
path = path.replace('\\', '/')
lines, start_line = inspect.getsourcelines(dobj.obj)
start_line = start_line or 1 # GH-296
end_line = start_line + len(lines) - 1
url = template.format(**locals())
return url
except Exception:
warn(f'format_git_link for {dobj.obj} failed:\n{traceback.format_exc()}')
return None
@lru_cache()
def _git_head_commit():
"""
If the working directory is part of a git repository, return the
head git commit hash. Otherwise, raise a CalledProcessError.
"""
process_args = ['git', 'rev-parse', 'HEAD']
try:
commit = subprocess.check_output(process_args, universal_newlines=True).strip()
return commit
except OSError as error:
warn(f"git executable not found on system:\n{error}")
except subprocess.CalledProcessError as error:
warn(
"Ensure pdoc is run within a git repository.\n"
f"`{' '.join(process_args)}` failed with output:\n{error.output}"
)
return None
@lru_cache()
def _git_project_root():
"""
Return the path to project root directory or None if indeterminate.
"""
for cmd in (['git', 'rev-parse', '--show-superproject-working-tree'],
['git', 'rev-parse', '--show-toplevel']):
try:
path = subprocess.check_output(cmd, universal_newlines=True).rstrip('\r\n')
if path:
return os.path.normpath(path)
except (subprocess.CalledProcessError, OSError):
pass
return None
@lru_cache()
def _project_relative_path(absolute_path):
"""
Convert an absolute path of a python source file to a project-relative path.
Assumes the project's path is either the current working directory or
Python library installation.
"""
from distutils.sysconfig import get_python_lib
for prefix_path in (_git_project_root() or os.getcwd(),
get_python_lib()):
common_path = os.path.commonpath([prefix_path, absolute_path])
if os.path.samefile(common_path, prefix_path):
# absolute_path is a descendant of prefix_path
return os.path.relpath(absolute_path, prefix_path)
raise RuntimeError(
f"absolute path {absolute_path!r} is not a descendant of the current working directory "
"or of the system's python library."
)
@lru_cache()
def _str_template_fields(template):
"""
Return a list of `str.format` field names in a template string.
"""
from string import Formatter
return [
field_name
for _, field_name, _, _ in Formatter().parse(template)
if field_name is not None
]
Functions
def extract_toc(text: str)
-
Returns HTML Table of Contents containing markdown titles in
text
.Expand source code Browse git
def extract_toc(text: str): """ Returns HTML Table of Contents containing markdown titles in `text`. """ with _fenced_code_blocks_hidden(text) as result: result[0] = _ToMarkdown.DOCTESTS_RE.sub('', result[0]) text = result[0] toc, _ = _md.reset().convert(f'[TOC]\n\n@CUT@\n\n{text}').split('@CUT@', 1) if toc.endswith('<p>'): # CUT was put into its own paragraph toc = toc[:-3].rstrip() return toc
def format_git_link(template: str, dobj: Doc)
-
Interpolate
template
as a formatted string literal using values extracted fromdobj
and the working environment.Expand source code Browse git
def format_git_link(template: str, dobj: pdoc.Doc): """ Interpolate `template` as a formatted string literal using values extracted from `dobj` and the working environment. """ if not template: return None try: if 'commit' in _str_template_fields(template): commit = _git_head_commit() abs_path = inspect.getfile(inspect.unwrap(dobj.obj)) path = _project_relative_path(abs_path) # Urls should always use / instead of \\ if os.name == 'nt': path = path.replace('\\', '/') lines, start_line = inspect.getsourcelines(dobj.obj) start_line = start_line or 1 # GH-296 end_line = start_line + len(lines) - 1 url = template.format(**locals()) return url except Exception: warn(f'format_git_link for {dobj.obj} failed:\n{traceback.format_exc()}') return None
def glimpse(text: str, max_length=153, *, paragraph=True)
-
Returns a short excerpt (e.g. first paragraph) of text. If
paragraph
is True, the first paragraph will be returned, but never longer thanmax_length
characters.Expand source code Browse git
def glimpse(text: str, max_length=153, *, paragraph=True, _split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1), _trim_last_word=partial(re.compile(r'\S+$').sub, ''), _remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')): """ Returns a short excerpt (e.g. first paragraph) of text. If `paragraph` is True, the first paragraph will be returned, but never longer than `max_length` characters. """ text = text.lstrip() if paragraph: text, *rest = _split_paragraph(text) if rest: text = text.rstrip('.') text += ' …' text = _remove_titles(text).strip() if len(text) > max_length: text = _trim_last_word(text[:max_length - 2]) if not text.endswith('.') or not paragraph: text = text.rstrip('. ') + ' …' return text
def minify_css(css: str)
-
Minify CSS by removing extraneous whitespace, comments, and trailing semicolons.
Expand source code Browse git
@lru_cache() def minify_css(css: str, _whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'), _comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''), _trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')): """ Minify CSS by removing extraneous whitespace, comments, and trailing semicolons. """ return _trailing_semicolon(_whitespace(_comments(css))).strip()
def minify_html(html: str)
-
Minify HTML by replacing all consecutive whitespace with a single space (or newline) character, except inside
<pre>
tags.Expand source code Browse git
def minify_html(html: str, _minify=partial( re.compile(r'(.*?)(<pre\b.*?</pre\b\s*>)|(.*)', re.IGNORECASE | re.DOTALL).sub, lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): ( _norm_space(m.group(1) or '') + (m.group(2) or '') + _norm_space(m.group(3) or '')))): """ Minify HTML by replacing all consecutive whitespace with a single space (or newline) character, except inside `<pre>` tags. """ return _minify(html)
def to_html(text: str, *, docformat: str = None, module: Module = None, link: Callable[..., str] = None, latex_math: bool = False)
-
Returns HTML of
text
interpreted asdocformat
.__docformat__
is respected if present, otherwise Numpydoc and Google-style docstrings are assumed, as well as pure Markdown.module
should be the documented module (so the references can be resolved) andlink
is the hyperlinking function like the one in the example template.Expand source code Browse git
def to_html(text: str, *, docformat: str = None, module: pdoc.Module = None, link: Callable[..., str] = None, latex_math: bool = False): """ Returns HTML of `text` interpreted as `docformat`. `__docformat__` is respected if present, otherwise Numpydoc and Google-style docstrings are assumed, as well as pure Markdown. `module` should be the documented module (so the references can be resolved) and `link` is the hyperlinking function like the one in the example template. """ # Optionally register our math syntax processor if not latex_math and _MathPattern.NAME in _md.inlinePatterns: _md.inlinePatterns.deregister(_MathPattern.NAME) elif latex_math and _MathPattern.NAME not in _md.inlinePatterns: _md.inlinePatterns.register(_MathPattern(_MathPattern.PATTERN), _MathPattern.NAME, _MathPattern.PRIORITY) md = to_markdown(text, docformat=docformat, module=module, link=link) return _md.reset().convert(md)
def to_markdown(text: str, *, docformat: str = None, module: Module = None, link: Callable[..., str] = None)
-
Returns
text
, assumed to be a docstring indocformat
, converted to markdown.__docformat__
is respected if present, otherwise Numpydoc and Google-style docstrings are assumed, as well as pure Markdown.module
should be the documented module (so the references can be resolved) andlink
is the hyperlinking function like the one in the example template.Expand source code Browse git
def to_markdown(text: str, *, docformat: str = None, module: pdoc.Module = None, link: Callable[..., str] = None): """ Returns `text`, assumed to be a docstring in `docformat`, converted to markdown. `__docformat__` is respected if present, otherwise Numpydoc and Google-style docstrings are assumed, as well as pure Markdown. `module` should be the documented module (so the references can be resolved) and `link` is the hyperlinking function like the one in the example template. """ if not docformat: docformat = str(getattr(getattr(module, 'obj', None), '__docformat__', 'numpy,google ')) docformat, *_ = docformat.lower().split() if not (set(docformat.split(',')) & {'', 'numpy', 'google'}): warn(f'__docformat__ value {docformat!r} in module {module!r} not supported. ' 'Supported values are: numpy, google.') docformat = 'numpy,google' with _fenced_code_blocks_hidden(text) as result: text = result[0] text = _ToMarkdown.admonitions(text, module) if 'google' in docformat: text = _ToMarkdown.google(text) text = _ToMarkdown.doctests(text) text = _ToMarkdown.raw_urls(text) # If doing both, do numpy after google, otherwise google-style's # headings are incorrectly interpreted as numpy params if 'numpy' in docformat: text = _ToMarkdown.numpy(text) if module and link: # Hyperlink markdown code spans not within markdown hyperlinks. # E.g. `code` yes, but not [`code`](...). RE adapted from: # https://github.com/Python-Markdown/markdown/blob/ada40c66/markdown/inlinepatterns.py#L106 # Also avoid linking triple-backticked arg names in deflists. linkify = partial(_linkify, link=link, module=module, wrap_code=True) text = re.sub(r'(?P<inside_link>\[[^\]]*?)?' r'(?:(?<!\\)(?:\\{2})+(?=`)|(?<!\\)(?P<fence>`+)' r'(?P<code>.+?)(?<!`)' r'(?P=fence)(?!`))', lambda m: (m.group() if m.group('inside_link') or len(m.group('fence')) > 2 else linkify(m)), text) result[0] = text text = result[0] return text
Classes
class ReferenceWarning (*args, **kwargs)
-
This warning is raised in
to_html()
when a object reference in markdown doesn't match any documented objects.Look for this warning to catch typos / references to obsolete symbols.
Expand source code Browse git
class ReferenceWarning(UserWarning): """ This warning is raised in `to_html` when a object reference in markdown doesn't match any documented objects. Look for this warning to catch typos / references to obsolete symbols. """
Ancestors
- builtins.UserWarning
- builtins.Warning
- builtins.Exception
- builtins.BaseException