A wild Perl script appears

This commit is contained in:
Jeffrey Serio 2021-09-21 23:15:22 -05:00
parent cb3781ec66
commit e3ddbae998
8 changed files with 119 additions and 120 deletions

View File

@ -29,15 +29,17 @@ cargo install mdbook
# Clone this repository
git clone https://github.com/hyperreal64/zsh-manual-mdbook
# Install Python dependencies
# Install Perl dependencies
# Ensure cpanminus is installed from your distro's repos
# e.g. sudo apt install cpanminus or sudo dnf install cpanminus
cd zsh-manual-mdbook
pip install -r requirements.txt
sudo cpanm Archive::Extract File::Basename HTTP::Simple IPC::Cmd Mojo::DOM Mojo::UserAgent
# Ensure DocToc is installed
sudo npm -g install doctoc
# Return to repository clone and run the make-mdbook script
./make-mdbook.sh
./make-mdbook.pl
# Serve locally
cd zsh_manual/

View File

@ -1,39 +0,0 @@
import re
import requests
from bs4 import BeautifulSoup
# Check if input contains integer substring
def hasInteger(input):
return bool(re.search(r"(?<![\d.])[0-9]+(?![\d.])", input))
# Generate SUMMARY.md from table of contents
page = requests.get("http://zsh.sourceforge.net/Doc/Release/zsh_toc.html")
soup = BeautifulSoup(page.content, "html.parser")
results = soup.find_all("a")[5:329]
contents = []
for elem in results:
if elem not in contents:
contents.append(elem.get_text())
output = []
output.append("# Summary\n")
for item in contents:
if hasInteger(item):
if "&" in item:
title = item.lstrip("0123456789. ")
fn0 = title.replace("&", "_0026")
fn1 = fn0.replace(" ", "-") + ".md"
new_line = "- [%s](./%s)" % (title, fn1)
output.append(new_line)
else:
title = item.lstrip("0123456789. ")
filename = title.replace(" ", "-") + ".md"
new_line = "- [%s](./%s)" % (title, filename)
output.append(new_line)
else:
continue
for line in output:
print(line)

111
make_mdbook.pl Executable file
View File

@ -0,0 +1,111 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Archive::Extract;
use Cwd;
use File::Basename;
use HTTP::Simple;
use IPC::Cmd qw[can_run run_forked];
use Mojo::DOM;
use Mojo::UserAgent;
# Check for dependencies, exit with error if not available
can_run("pandoc") or die "pandoc command not found";
can_run("doctoc") or die "doctoc command not found";
my $zsh_html_url = "http://zsh.sourceforge.net/Doc/zsh_html.tar.gz";
my $zsh_doc_tmp_dir = $ENV{'HOME'} . "/zsh_doc_tmp";
my $zsh_html_src_dir = $zsh_doc_tmp_dir . "/zsh_html";
my $mdbook_src_dir = $zsh_doc_tmp_dir . "/mdbook_src";
# Ensure fresh tmp directories exist
system("rm -rf $zsh_doc_tmp_dir") == 0 or die "$?";
mkdir($zsh_doc_tmp_dir) or die "$?";
mkdir($zsh_html_src_dir) or die "$?";
mkdir($mdbook_src_dir) or die "$?";
# Download, extract zsh_html.tar.gz to zsh_doc_tmp_dir
my $dest = "$zsh_doc_tmp_dir/zsh_html.tar.gz";
my $status = getstore( $zsh_html_url, $dest )
or die "Error in fetching or storing $zsh_html_url: $?";
my $ae = Archive::Extract->new( archive => $dest );
my $ok = $ae->extract( to => $zsh_doc_tmp_dir )
or die $ae->error;
# Extra files are added to the archive that have no content besides aliases to
# pages that do. They are all less than 4k, and they are just clutter for mdbook, so
# we remove them here along with the Index pages.
my @files = <$zsh_html_src_dir/*.html>;
foreach my $file (@files) {
unlink $file if ( -s $file < 4000 );
my $bname = qx(basename $file);
unlink $file if ( $bname =~ /-Index/ );
unlink $file if ( $bname =~ /^zsh_[\w]+/ );
}
# Remove HTML noise
@files = <$zsh_html_src_dir/*.html>;
foreach my $file (@files) {
system("perl -i -ne 'print unless m/valign/' $file") == 0 or die "$?";
system("perl -i -ne 'print unless m/table/' $file") == 0 or die "$?";
}
# Convert html to md with pandoc
foreach my $file (@files) {
my $basename = basename( $file, ".html" );
my @pandoc_cmd = (
"pandoc", "$file", "-f", "html", "-t", "gfm", "-o",
"$mdbook_src_dir/$basename.md"
);
system(@pandoc_cmd) == 0 or die "system @pandoc_cmd failed: $?";
}
# Rename Jobs-_0026-Signals.md to Jobs-&-Signals.md
rename(
"$mdbook_src_dir/Jobs-_0026-Signals.md",
"$mdbook_src_dir/Jobs-&-Signals.md"
) or die "$?";
# Generate TOC with doctoc
my $result = run_forked("doctoc $mdbook_src_dir/");
print $result->{'stdout'} if ( $result->{'exit_code'} == 0 );
print $result->{'stderr'} if ( $result->{'exit_code'} != 0 );
# Generate SUMMARY.md from table of contents
my $ua = Mojo::UserAgent->new;
my $dom =
$ua->get("https://zsh.sourceforge.io/Doc/Release/zsh_toc.html")->res->dom;
my $summary_file = "$mdbook_src_dir/SUMMARY.md";
open( FH, ">", $summary_file ) or die $?;
print FH "# Summary\n\n";
my @toc = $dom->find('a')->map('text')->each;
foreach my $toc (@toc) {
if ( $toc =~ /(?<![\d.])[0-9]+(?![\d.])/ ) {
my $line = "$toc\n";
$line =~ s/\d+//;
$line =~ s/^\s+|\s+$//g;
my $title = $line;
$line =~ s/\s+/-/g;
print FH "- [$title](./$line.md)\n";
}
}
close(FH);
# Copy md files to zsh manual mdbook src
# First ensure it is empty
my $cwd = getcwd();
system("rm -rf $cwd/zsh_manual/src") == 0 or die "$?";
mkdir("$cwd/zsh_manual/src") or die "$?";
system("cp $mdbook_src_dir/* $cwd/zsh_manual/src/") == 0 or die "$?";
# Cleanup zsh_doc_tmp
system("rm -rf $zsh_doc_tmp_dir") == 0 or die "$?";

View File

@ -1,57 +0,0 @@
#!/usr/bin/env bash
set -euxo pipefail
_zsh_html_url="http://zsh.sourceforge.net/Doc/zsh_html.tar.gz"
_zsh_doc_tmp_dir="${HOME}/zsh_doc_tmp"
_zsh_html_src_dir="${_zsh_doc_tmp_dir}/zsh_html"
_zsh_md_src_dir="${_zsh_doc_tmp_dir}/zsh_md"
_mdbook_src_dir="${_zsh_doc_tmp_dir}/mdbook_src"
# Ensure zsh_doc_tmp directories exist
rm -rf "${_zsh_doc_tmp_dir}"
for dir in "${_zsh_html_src_dir}" "${_zsh_md_src_dir}" "${_mdbook_src_dir}"; do
if ! [ -d "$dir" ]; then
mkdir -p "$dir"
fi
done
# Download and extract zsh_html.tar.gz to zsh_doc_tmp_dir
wget "${_zsh_html_url}" -O "${_zsh_doc_tmp_dir}/zsh_html.tar.gz"
tar xzf "${_zsh_doc_tmp_dir}/zsh_html.tar.gz" -C "${_zsh_doc_tmp_dir}"
# For some reason, extra files are added to the archive that
# have no content besides aliases to pages that do.
# They are all less than 4k, and they are just clutter for mdbook,
# so we remove them here, along with the Index pages.
find "${_zsh_html_src_dir}" -name "*.html" -type 'f' -size -4k -delete
find "${_zsh_html_src_dir}" -name "*-Index.html" -type 'f' -delete
find "${_zsh_html_src_dir}" -name "zsh_*.html" -type 'f' -delete
# Remove html noise
for file in "${_zsh_html_src_dir}"/*.html; do
sed -i '/table/d' "$file"
sed -i '/span/d' "$file"
sed -i '/valign/d' "$file"
sed -i '/\[\]{#/d' "$file"
done
# Convert html to md with pandoc
for file in "${_zsh_html_src_dir}"/*.html; do
pandoc "$file" -f html -t gfm -o "${_mdbook_src_dir}/$(basename -- "$file" .html).md";
done
# Generate TOC with doctoc
doctoc "${_mdbook_src_dir}"/*.md
# Generate SUMMARY.md from zsh_toc.html
python3 "${PWD}/gen_summ_manual.py" > "${_mdbook_src_dir}/SUMMARY.md"
# Copy md files to mdbook src
# First ensure the src directory is empty
rm -rf "${PWD}/zsh_manual/src"
mkdir "${PWD}/zsh_manual/src"
cp -rf "${_mdbook_src_dir}"/* "${PWD}/zsh_manual/src/"
# Cleanup zsh_doc_tmp
rm -rf "${_zsh_doc_tmp_dir}"

View File

@ -1,19 +0,0 @@
appdirs==1.4.4
beautifulsoup4==4.9.3
black==21.5b0
bs4==0.0.1
certifi==2020.12.5
chardet==4.0.0
click==7.1.2
flake8==3.9.2
idna==2.10
mccabe==0.6.1
mypy-extensions==0.4.3
pathspec==0.8.1
pycodestyle==2.7.0
pyflakes==2.3.1
regex==2021.4.4
requests==2.25.1
soupsieve==2.2.1
toml==0.10.2
urllib3==1.26.4

View File

@ -1546,7 +1546,8 @@ lists and for the line editor.
`LISTMAX`
In the line editor, the number of matches to list without asking most as
In the line editor, the number of matches to list without asking first.
If the value is negative, the list will be shown if it spans at most as
many lines as given by the absolute value. If set to zero, the shell
asks only if the top of the listing would scroll off the screen.

View File

@ -9,7 +9,7 @@
- [Redirection](./Redirection.md)
- [Command Execution](./Command-Execution.md)
- [Functions](./Functions.md)
- [Jobs & Signals](./Jobs-_0026-Signals.md)
- [Jobs & Signals](./Jobs-&-Signals.md)
- [Arithmetic Evaluation](./Arithmetic-Evaluation.md)
- [Conditional Expressions](./Conditional-Expressions.md)
- [Prompt Expansion](./Prompt-Expansion.md)