✨ A wild Perl script appears

2021-09-21 23:15:22 -05:00 · 2021-09-21 23:15:22 -05:00 · e3ddbae998
commit e3ddbae998
parent cb3781ec66
8 changed files with 119 additions and 120 deletions
--- a/README.md
+++ b/README.md
@ -29,15 +29,17 @@ cargo install mdbook
 # Clone this repository
 git clone https://github.com/hyperreal64/zsh-manual-mdbook

-# Install Python dependencies
+# Install Perl dependencies
+# Ensure cpanminus is installed from your distro's repos
+# e.g. sudo apt install cpanminus or sudo dnf install cpanminus
 cd zsh-manual-mdbook
-pip install -r requirements.txt
+sudo cpanm Archive::Extract File::Basename HTTP::Simple IPC::Cmd Mojo::DOM Mojo::UserAgent

 # Ensure DocToc is installed
 sudo npm -g install doctoc

 # Return to repository clone and run the make-mdbook script
-./make-mdbook.sh
+./make-mdbook.pl

 # Serve locally
 cd zsh_manual/
--- a/gen_summ_manual.py
+++ b/gen_summ_manual.py
@ -1,39 +0,0 @@
-import re
-import requests
-from bs4 import BeautifulSoup
-
-
-# Check if input contains integer substring
-def hasInteger(input):
-    return bool(re.search(r"(?<![\d.])[0-9]+(?![\d.])", input))
-
-# Generate SUMMARY.md from table of contents
-page = requests.get("http://zsh.sourceforge.net/Doc/Release/zsh_toc.html")
-soup = BeautifulSoup(page.content, "html.parser")
-results = soup.find_all("a")[5:329]
-contents = []
-for elem in results:
-    if elem not in contents:
-        contents.append(elem.get_text())
-
-output = []
-output.append("# Summary\n")
-
-for item in contents:
-    if hasInteger(item):
-        if "&" in item:
-            title = item.lstrip("0123456789. ")
-            fn0 = title.replace("&", "_0026")
-            fn1 = fn0.replace(" ", "-") + ".md"
-            new_line = "- [%s](./%s)" % (title, fn1)
-            output.append(new_line)
-        else:
-            title = item.lstrip("0123456789. ")
-            filename = title.replace(" ", "-") + ".md"
-            new_line = "- [%s](./%s)" % (title, filename)
-            output.append(new_line)
-    else:
-        continue
-
-for line in output:
-    print(line)
--- a/make_mdbook.pl
+++ b/make_mdbook.pl
@ -0,0 +1,111 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Archive::Extract;
+use Cwd;
+use File::Basename;
+use HTTP::Simple;
+use IPC::Cmd qw[can_run run_forked];
+use Mojo::DOM;
+use Mojo::UserAgent;
+
+# Check for dependencies, exit with error if not available
+can_run("pandoc") or die "pandoc command not found";
+can_run("doctoc") or die "doctoc command not found";
+
+my $zsh_html_url     = "http://zsh.sourceforge.net/Doc/zsh_html.tar.gz";
+my $zsh_doc_tmp_dir  = $ENV{'HOME'} . "/zsh_doc_tmp";
+my $zsh_html_src_dir = $zsh_doc_tmp_dir . "/zsh_html";
+my $mdbook_src_dir   = $zsh_doc_tmp_dir . "/mdbook_src";
+
+# Ensure fresh tmp directories exist
+system("rm -rf $zsh_doc_tmp_dir") == 0 or die "$?";
+mkdir($zsh_doc_tmp_dir)                or die "$?";
+mkdir($zsh_html_src_dir)               or die "$?";
+mkdir($mdbook_src_dir)                 or die "$?";
+
+# Download, extract zsh_html.tar.gz to zsh_doc_tmp_dir
+my $dest   = "$zsh_doc_tmp_dir/zsh_html.tar.gz";
+my $status = getstore( $zsh_html_url, $dest )
+  or die "Error in fetching or storing $zsh_html_url: $?";
+
+my $ae = Archive::Extract->new( archive => $dest );
+my $ok = $ae->extract( to => $zsh_doc_tmp_dir )
+  or die $ae->error;
+
+# Extra files are added to the archive that have no content besides aliases to
+# pages that do. They are all less than 4k, and they are just clutter for mdbook, so
+# we remove them here along with the Index pages.
+my @files = <$zsh_html_src_dir/*.html>;
+
+foreach my $file (@files) {
+    unlink $file if ( -s $file < 4000 );
+
+    my $bname = qx(basename $file);
+    unlink $file if ( $bname =~ /-Index/ );
+    unlink $file if ( $bname =~ /^zsh_[\w]+/ );
+}
+
+# Remove HTML noise
+@files = <$zsh_html_src_dir/*.html>;
+foreach my $file (@files) {
+    system("perl -i -ne 'print unless m/valign/' $file") == 0 or die "$?";
+    system("perl -i -ne 'print unless m/table/' $file") == 0  or die "$?";
+}
+
+# Convert html to md with pandoc
+foreach my $file (@files) {
+    my $basename   = basename( $file, ".html" );
+    my @pandoc_cmd = (
+        "pandoc", "$file", "-f", "html", "-t", "gfm", "-o",
+        "$mdbook_src_dir/$basename.md"
+    );
+
+    system(@pandoc_cmd) == 0 or die "system @pandoc_cmd failed: $?";
+}
+
+# Rename Jobs-_0026-Signals.md to Jobs-&-Signals.md
+rename(
+    "$mdbook_src_dir/Jobs-_0026-Signals.md",
+    "$mdbook_src_dir/Jobs-&-Signals.md"
+) or die "$?";
+
+# Generate TOC with doctoc
+my $result = run_forked("doctoc $mdbook_src_dir/");
+print $result->{'stdout'} if ( $result->{'exit_code'} == 0 );
+print $result->{'stderr'} if ( $result->{'exit_code'} != 0 );
+
+# Generate SUMMARY.md from table of contents
+my $ua = Mojo::UserAgent->new;
+
+my $dom =
+  $ua->get("https://zsh.sourceforge.io/Doc/Release/zsh_toc.html")->res->dom;
+
+my $summary_file = "$mdbook_src_dir/SUMMARY.md";
+open( FH, ">", $summary_file ) or die $?;
+print FH "# Summary\n\n";
+
+my @toc = $dom->find('a')->map('text')->each;
+foreach my $toc (@toc) {
+    if ( $toc =~ /(?<![\d.])[0-9]+(?![\d.])/ ) {
+        my $line = "$toc\n";
+        $line =~ s/\d+//;
+        $line =~ s/^\s+|\s+$//g;
+        my $title = $line;
+        $line =~ s/\s+/-/g;
+        print FH "- [$title](./$line.md)\n";
+    }
+}
+close(FH);
+
+# Copy md files to zsh manual mdbook src
+# First ensure it is empty
+my $cwd = getcwd();
+system("rm -rf $cwd/zsh_manual/src") == 0                or die "$?";
+mkdir("$cwd/zsh_manual/src")                             or die "$?";
+system("cp $mdbook_src_dir/* $cwd/zsh_manual/src/") == 0 or die "$?";
+
+# Cleanup zsh_doc_tmp
+system("rm -rf $zsh_doc_tmp_dir") == 0 or die "$?";
--- a/make_mdbook.sh
+++ b/make_mdbook.sh
@ -1,57 +0,0 @@
-#!/usr/bin/env bash
-
-set -euxo pipefail
-
-_zsh_html_url="http://zsh.sourceforge.net/Doc/zsh_html.tar.gz"
-_zsh_doc_tmp_dir="${HOME}/zsh_doc_tmp"
-_zsh_html_src_dir="${_zsh_doc_tmp_dir}/zsh_html"
-_zsh_md_src_dir="${_zsh_doc_tmp_dir}/zsh_md"
-_mdbook_src_dir="${_zsh_doc_tmp_dir}/mdbook_src"
-
-# Ensure zsh_doc_tmp directories exist
-rm -rf "${_zsh_doc_tmp_dir}"
-for dir in "${_zsh_html_src_dir}" "${_zsh_md_src_dir}" "${_mdbook_src_dir}"; do
-  if ! [ -d "$dir" ]; then
-    mkdir -p "$dir"
-  fi
-done
-
-# Download and extract zsh_html.tar.gz to zsh_doc_tmp_dir
-wget "${_zsh_html_url}" -O "${_zsh_doc_tmp_dir}/zsh_html.tar.gz"
-tar xzf "${_zsh_doc_tmp_dir}/zsh_html.tar.gz" -C "${_zsh_doc_tmp_dir}"
-
-# For some reason, extra files are added to the archive that
-# have no content besides aliases to pages that do.
-# They are all less than 4k, and they are just clutter for mdbook,
-# so we remove them here, along with the Index pages.
-find "${_zsh_html_src_dir}" -name "*.html" -type 'f' -size -4k -delete
-find "${_zsh_html_src_dir}" -name "*-Index.html" -type 'f' -delete
-find "${_zsh_html_src_dir}" -name "zsh_*.html" -type 'f' -delete
-
-# Remove html noise
-for file in "${_zsh_html_src_dir}"/*.html; do
-  sed -i '/table/d' "$file"
-  sed -i '/span/d' "$file"
-  sed -i '/valign/d' "$file"
-  sed -i '/\[\]{#/d' "$file"
-done
-
-# Convert html to md with pandoc
-for file in "${_zsh_html_src_dir}"/*.html; do
-  pandoc "$file" -f html -t gfm -o "${_mdbook_src_dir}/$(basename -- "$file" .html).md";
-done
-
-# Generate TOC with doctoc
-doctoc "${_mdbook_src_dir}"/*.md
-
-# Generate SUMMARY.md from zsh_toc.html
-python3 "${PWD}/gen_summ_manual.py" > "${_mdbook_src_dir}/SUMMARY.md"
-
-# Copy md files to mdbook src
-# First ensure the src directory is empty
-rm -rf "${PWD}/zsh_manual/src"
-mkdir "${PWD}/zsh_manual/src"
-cp -rf "${_mdbook_src_dir}"/* "${PWD}/zsh_manual/src/"
-
-# Cleanup zsh_doc_tmp
-rm -rf "${_zsh_doc_tmp_dir}"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,19 +0,0 @@
-appdirs==1.4.4
-beautifulsoup4==4.9.3
-black==21.5b0
-bs4==0.0.1
-certifi==2020.12.5
-chardet==4.0.0
-click==7.1.2
-flake8==3.9.2
-idna==2.10
-mccabe==0.6.1
-mypy-extensions==0.4.3
-pathspec==0.8.1
-pycodestyle==2.7.0
-pyflakes==2.3.1
-regex==2021.4.4
-requests==2.25.1
-soupsieve==2.2.1
-toml==0.10.2
-urllib3==1.26.4
--- a/zsh_manual/src/Jobs-_0026-Signals.md
+++ b/zsh_manual/src/Jobs-_0026-Signals.md
--- a/zsh_manual/src/Parameters.md
+++ b/zsh_manual/src/Parameters.md
@ -1546,7 +1546,8 @@ lists and for the line editor.

 `LISTMAX`

-In the line editor, the number of matches to list without asking most as
+In the line editor, the number of matches to list without asking first.
+If the value is negative, the list will be shown if it spans at most as
 many lines as given by the absolute value. If set to zero, the shell
 asks only if the top of the listing would scroll off the screen.

--- a/zsh_manual/src/SUMMARY.md
+++ b/zsh_manual/src/SUMMARY.md
@ -9,7 +9,7 @@
 - [Redirection](./Redirection.md)
 - [Command Execution](./Command-Execution.md)
 - [Functions](./Functions.md)
- [Jobs & Signals](./Jobs-_0026-Signals.md)
+- [Jobs & Signals](./Jobs-&-Signals.md)
 - [Arithmetic Evaluation](./Arithmetic-Evaluation.md)
 - [Conditional Expressions](./Conditional-Expressions.md)
 - [Prompt Expansion](./Prompt-Expansion.md)