3520 lines
197 KiB
HTML
3520 lines
197 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="sidebar-visible no-js light">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Substitutions - Zsh User's Guide</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
<meta name="description" content="">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff" />
|
|
|
|
<link rel="icon" href="favicon.svg">
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" href="highlight.css">
|
|
<link rel="stylesheet" href="tomorrow-night.css">
|
|
<link rel="stylesheet" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
<link rel="stylesheet" href="./theme/catppuccin.css">
|
|
<link rel="stylesheet" href="./theme/catppuccin-highlight.css">
|
|
|
|
</head>
|
|
<body>
|
|
<!-- Provide site root to javascript -->
|
|
<script type="text/javascript">
|
|
var path_to_root = "";
|
|
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
|
|
</script>
|
|
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script type="text/javascript">
|
|
try {
|
|
var theme = localStorage.getItem('mdbook-theme');
|
|
var sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script type="text/javascript">
|
|
var theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
var html = document.querySelector('html');
|
|
html.classList.remove('no-js')
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add('js');
|
|
</script>
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script type="text/javascript">
|
|
var html = document.querySelector('html');
|
|
var sidebar = 'hidden';
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
}
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<div class="sidebar-scrollbox">
|
|
<ol class="chapter"><li class="chapter-item expanded affix "><a href="zshguide.html">A User's Guide to the Z-Shell</a></li><li class="chapter-item expanded "><a href="zshguide01.html"><strong aria-hidden="true">1.</strong> A short introduction</a></li><li class="chapter-item expanded "><a href="zshguide02.html"><strong aria-hidden="true">2.</strong> What to put in your startup files</a></li><li class="chapter-item expanded "><a href="zshguide03.html"><strong aria-hidden="true">3.</strong> Dealing with basic shell syntax</a></li><li class="chapter-item expanded "><a href="zshguide04.html"><strong aria-hidden="true">4.</strong> The Z-Shell Line Editor</a></li><li class="chapter-item expanded "><a href="zshguide05.html" class="active"><strong aria-hidden="true">5.</strong> Substitutions</a></li><li class="chapter-item expanded "><a href="zshguide06.html"><strong aria-hidden="true">6.</strong> Completion, old and new</a></li><li class="chapter-item expanded "><a href="zshguide07.html"><strong aria-hidden="true">7.</strong> Modules and other bits and pieces Not written</a></li></ol>
|
|
</div>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky bordered">
|
|
<div class="left-buttons">
|
|
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</button>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="latte">Latte</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="frappe">Frappé</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="macchiato">Macchiato</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="mocha">Mocha</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Zsh User's Guide</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script type="text/javascript">
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
<p><strong>Table of Contents</strong> <em>generated with <a href="https://github.com/thlorenz/doctoc">DocToc</a></em></p>
|
|
<ul>
|
|
<li><a href="#chapter-5-substitutions">Chapter 5: Substitutions</a>
|
|
<ul>
|
|
<li><a href="#51-quoting">5.1: Quoting</a>
|
|
<ul>
|
|
<li><a href="#511-backslashes">5.1.1: Backslashes</a></li>
|
|
<li><a href="#512-single-quotes">5.1.2: Single quotes</a></li>
|
|
<li><a href="#513-posix-quotes">5.1.3: POSIX quotes</a></li>
|
|
<li><a href="#514-double-quotes">5.1.4: Double quotes</a></li>
|
|
<li><a href="#515-backquotes">5.1.5: Backquotes</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#52-modifiers-and-what-they-modify">5.2: Modifiers and what they modify</a></li>
|
|
<li><a href="#53-process-substitution">5.3: Process Substitution</a></li>
|
|
<li><a href="#54-parameter-substitution">5.4: Parameter substitution</a>
|
|
<ul>
|
|
<li><a href="#541-using-arrays">5.4.1: Using arrays</a></li>
|
|
<li><a href="#542-using-associative-arrays">5.4.2: Using associative arrays</a></li>
|
|
<li><a href="#543-substituted-substitutions-top--and-tailing-etc">5.4.3: Substituted substitutions, top- and tailing, etc.</a></li>
|
|
<li><a href="#544-flags-for-options-splitting-and-joining">5.4.4: Flags for options: splitting and joining</a></li>
|
|
<li><a href="#545-flags-for-options-glob_subst-and-rc_expand_param">5.4.5: Flags for options: <code>GLOB_SUBST</code> and <code>RC_EXPAND_PARAM</code></a></li>
|
|
<li><a href="#546-yet-more-parameter-flags">5.4.6: Yet more parameter flags</a></li>
|
|
<li><a href="#547-a-couple-of-parameter-substitution-tricks">5.4.7: A couple of parameter substitution tricks</a></li>
|
|
<li><a href="#548-nested-parameter-substitutions">5.4.8: Nested parameter substitutions</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#55-that-substitution-again">5.5: That substitution again</a></li>
|
|
<li><a href="#56-arithmetic-expansion">5.6: Arithmetic Expansion</a>
|
|
<ul>
|
|
<li><a href="#561-entering-and-outputting-bases">5.6.1: Entering and outputting bases</a></li>
|
|
<li><a href="#562-parameter-typing">5.6.2: Parameter typing</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#57-brace-expansion-and-arrays">5.7: Brace Expansion and Arrays</a></li>
|
|
<li><a href="#58-filename-expansion">5.8: Filename Expansion</a></li>
|
|
<li><a href="#59-filename-generation-and-pattern-matching">5.9: Filename Generation and Pattern Matching</a>
|
|
<ul>
|
|
<li><a href="#591-comparing-patterns-and-regular-expressions">5.9.1: Comparing patterns and regular expressions</a></li>
|
|
<li><a href="#592-standard-features">5.9.2: Standard features</a></li>
|
|
<li><a href="#593-extensions-usually-available">5.9.3: Extensions usually available</a></li>
|
|
<li><a href="#594-extensions-requiring-extended_glob">5.9.4: Extensions requiring <code>EXTENDED_GLOB</code></a></li>
|
|
<li><a href="#595-recursive-globbing">5.9.5: Recursive globbing</a></li>
|
|
<li><a href="#596-glob-qualifiers">5.9.6: Glob qualifiers</a></li>
|
|
<li><a href="#597-globbing-flags-alter-the-behaviour-of-matches">5.9.7: Globbing flags: alter the behaviour of matches</a></li>
|
|
<li><a href="#598-the-function-zmv">5.9.8: The function <code>zmv</code></a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
<p><span id="subst"></span><span id="l111"></span></p>
|
|
<h1 id="chapter-5-substitutions"><a class="header" href="#chapter-5-substitutions">Chapter 5: Substitutions</a></h1>
|
|
<p>This chapter will appeal above all to people who are excited by the fact
|
|
that</p>
|
|
<pre><code> print ${array[(r)${(l.${#${(O@)array//?/X}[1]}..?.)}]}
|
|
</code></pre>
|
|
<p>prints out the longest element of the array <code>$array</code>. For the
|
|
overwhelming majority that forms the rest of the population, however,
|
|
there should be plenty that is useful before we reach that stage.
|
|
Anyway, it should be immediately apparent why there is no obfuscated zsh
|
|
code competition.</p>
|
|
<p>For those who don't do a lot of function writing and spend most of the
|
|
time at the shell prompt, the most useful section of this chapter is
|
|
probably that on filename generation (i.e. globbing) at the end of the
|
|
chapter. This will teach you how to avoid wasting your time with <code>find</code>
|
|
and the like when you want to select files for a command.</p>
|
|
<p><span id="l112"></span></p>
|
|
<h2 id="51-quoting"><a class="header" href="#51-quoting">5.1: Quoting</a></h2>
|
|
<p>I've been using quotes of some sort throughout this guide, but I've
|
|
never gone into the detail. It's about time I did, since using quotes is
|
|
an important part of controlling the effects of the shell's various
|
|
substitutions. Here are the basic quoting types.</p>
|
|
<p><span id="l113"></span></p>
|
|
<h3 id="511-backslashes"><a class="header" href="#511-backslashes">5.1.1: Backslashes</a></h3>
|
|
<p>The main point to make about backslashes is that they are really
|
|
trivial. You can quote any character whatsoever from the shell with a
|
|
backslash, even if it didn't mean anything unquoted; so if the worst
|
|
comes to the worst, you can take any old string at all, whatever it has
|
|
in it --- random collections of quotes, backslashes, unprintable
|
|
characters --- quote every single character with a backslash, and the
|
|
shell will treat it as a plain string:</p>
|
|
<pre><code> print \T\h\i\s\ \i\s\ \*\p\o\i\n\t\l\e\s\s\*\ \
|
|
\-\ \b\u\t\ \v\a\l\i\d\!
|
|
</code></pre>
|
|
<p>Remember, too that, this means you need an extra layer of quotation to
|
|
pass a `<code>\n</code>', or whatever, down to <code>print</code>.</p>
|
|
<p>However, zsh has an easier way of making sure everything is quoted with
|
|
a backslash when that's needed. It's a special form of parameter
|
|
substitution, just one of many tricks you can do by supplying flags in
|
|
parentheses:</p>
|
|
<pre><code> % read string
|
|
This is a *string* with various `special' characters
|
|
% print -r -- ${(q)string}
|
|
This\ is\ a\ \*string\*\ with\ various\ \`special\'\ characters
|
|
</code></pre>
|
|
<p>The <code>read</code> builtin didn't do anything to what you typed, so <code>$string</code>
|
|
contains just those characters. The <code>-r</code> flag to print told it to print
|
|
out what came after it in raw fashion, and here's the special part:
|
|
<code>${(q)string}</code> tells the shell to output the parameter with backslashes
|
|
where needed to prevent special characters being interpreted. All
|
|
parameter flags are specific to zsh; no other shell has them.</p>
|
|
<p>The flag is not very useful there, because zsh usually (remember the
|
|
<code>GLOB_SUBST</code> option?) doesn't do anything special to characters from
|
|
substitutions anyway. Where it <em>is</em> extremely useful is if you are going
|
|
to re-evaluate the text in the substitution but still want it treated as
|
|
a plain string. So after the above,</p>
|
|
<pre><code> % eval print -r -- ${(q)string}
|
|
This is a *string* with various `special' characters
|
|
</code></pre>
|
|
<p>and you get back what you started with, because at the <code>eval</code> of the
|
|
command line the backslashes put in by the <code>(q)</code> flag meant that the
|
|
value was treated as a plain string.</p>
|
|
<p>You can strip off quotes in parameters, too; the flag <code>(Q)</code> does this.
|
|
It doesn't care whether backslashes or single or double quotes are used,
|
|
it treats them all the way the shell's parser would. You only need this
|
|
when the parameter has somehow acquired quotes in its value. One way
|
|
this can happen is if you try reading a file containing shell commands,
|
|
and for this there's another trick: the <code>(z)</code> flag splits a line into an
|
|
array in the same way as if the line had been read in and was, say,
|
|
being assigned to an array. Here's an example:</p>
|
|
<pre><code> % cat file
|
|
print 'a quoted string' and\ another\ argument
|
|
% read -r line <file
|
|
% for word in ${(z)line}; do
|
|
for> print -r "quoted: $word"
|
|
for> print -r "unquoted: ${(Q)word}"
|
|
for> done
|
|
quoted: print
|
|
unquoted: print
|
|
quoted: 'a quoted string'
|
|
unquoted: a quoted string
|
|
quoted: and\ another\ argument
|
|
unquoted: and another argument
|
|
</code></pre>
|
|
<p>You will notice that the <code>(z)</code> doesn't remove any of the quotes from the
|
|
words read in, but the <code>(Q)</code> flag does. Note the <code>-r</code> flags to both
|
|
<code>read</code> and <code>print</code>: the first prevents the backslashes being absorbed by
|
|
<code>read</code>, and the second prevents them being absorbed by <code>print</code>. I'm
|
|
afraid backslashes can be a bit of a pain in the neck.</p>
|
|
<p><span id="l114"></span></p>
|
|
<h3 id="512-single-quotes"><a class="header" href="#512-single-quotes">5.1.2: Single quotes</a></h3>
|
|
<p>The only thing you can't quote with single quotes is another single
|
|
quote. However, there's an option <code>RC_QUOTES</code>, where two single quotes
|
|
inside a single-quoted string are turned into one. Apparently `<code>RC</code>'
|
|
refers to the shell <code>rc</code> which appeared in plan9; it seems to be one of
|
|
those programmes that some people get fanatically worked up about while
|
|
the rest of us can't quite work out why. Zsh users may sympathise. (This
|
|
was corrected by Oliver Kiddle and Bart Schaefer after I guessed
|
|
incorrectly that <code>RC</code> stood for recursive, although you're welcome to
|
|
think of it that way anyway. It doesn't really work for
|
|
<code>RC_EXPAND_PARAM</code>, however, which is definitely from the <code>rc</code> shell, and
|
|
if you look at the source code you will find a variable called
|
|
`<code>plan9</code>' which is tested to see if that option is in effect.)</p>
|
|
<p>You might remember something like this from BASIC, although in that case
|
|
with double quotes --- in zsh, it works only with single quotes, for
|
|
some reason. So,</p>
|
|
<pre><code> print -r 'A ''quoted'' string'
|
|
</code></pre>
|
|
<p>would usually give you the output `<code>A quoted string</code>', but with the
|
|
option set it prints `<code>A 'quoted' string</code>'. The <code>-r</code> option to <code>print</code>
|
|
doesn't do anything here, it's just to show I'm not hiding anything.
|
|
This is usually a useful and harmless option to have set, since there's
|
|
no other good reason for having two quotes together within quotes.</p>
|
|
<p>The standard way of quoting single quotes is to end the quote, insert a
|
|
backslashed single quote, and restart quotes again:</p>
|
|
<pre><code> print -r 'A '\''quoted'\'' string'
|
|
</code></pre>
|
|
<p>which is unaffected by the option setting, since the quotes immediately
|
|
after the backslashes are always treated as an ordinary printable
|
|
character. What you <em>can't</em> ever do is use backslashes as a way of
|
|
quoting characters inside single quotes; they are just treated as
|
|
ordinary characters there.</p>
|
|
<p>You can make parameter flags produce strings quoted with single quotes
|
|
instead of backslashes by doubling the `<code>q</code>': `<code>${(qq)param}</code>' instead
|
|
of `<code>${(q)param}</code>'. The main use for this is that the result is shorter
|
|
if you know there are a lot of special characters in the string, and
|
|
it's also a bit more easy to read for humans rather than machines, but
|
|
usually it gains nothing over the other form. It can tell whether you
|
|
have <code>RC_QUOTES</code> set and uses that to make the string even shorter, so
|
|
be careful if you might use the resulting string somewhere where the
|
|
option isn't set.</p>
|
|
<p><span id="l115"></span></p>
|
|
<h3 id="513-posix-quotes"><a class="header" href="#513-posix-quotes">5.1.3: POSIX quotes</a></h3>
|
|
<p>There's a relative of single quotes which uses the syntax <code>$'</code> to
|
|
introduce a quoted string and <code>'</code> to end it; I refer to them as `POSIX
|
|
quotes' because they appear in the POSIX standard and I don't know what
|
|
else to call them; `string quotes' is one possibility, but sounds a bit
|
|
vague (what else would you quote?) The difference from single quotes is
|
|
that they understand the same backslash sequences as the print builtin.
|
|
Hence you can have the convenience of using `<code>\n</code>' for newline, `<code>\e</code>'
|
|
for escape, `<code>\xFF</code>' for an arbitrary character in hexadecimal, and so
|
|
on, for any command:</p>
|
|
<pre><code> % cat <<<$'Line\tone\nLine\ttwo'
|
|
Line one
|
|
Line two
|
|
</code></pre>
|
|
<p>Remember the `here string' notation `<code><<<</code>', which supplies standard
|
|
input for the command. Hence the output shows exactly how the quoted
|
|
string is being interpreted. It is the same as</p>
|
|
<pre><code> % print 'Line\tone\n\Line\ttwo'
|
|
Line one
|
|
Line two
|
|
</code></pre>
|
|
<p>but there the interpretation is done inside <code>print</code>, which isn't always
|
|
convenient. POSIX quotes are currently rather underused.</p>
|
|
<p>This is as good a point as any to mention that the shell is completely
|
|
`eight-bit clean', which means you can have any of the 256 possible
|
|
characters anywhere in your string. For example, <code>$'foo\000bar'</code> has an
|
|
embedded ASCII NUL in it (that's not a misprint --- officially, ASCII
|
|
non-printing characters have two- or three-letter abbreviations).
|
|
Usually this terminates a string, but the shell works around this when
|
|
you are using it internally; when you try and pass it as an argument to
|
|
an external programme, however, all bets are off. Almost certainly the
|
|
first NUL in that case will cause the programme to think the string is
|
|
finished, because no information about the length of arguments is passed
|
|
down and there's nothing the shell can do about it. Hence, for example:</p>
|
|
<pre><code> % echo $'foo\000bar'
|
|
foobar
|
|
% /bin/echo $'foo\000bar'
|
|
foo
|
|
</code></pre>
|
|
<p>The shell's <code>echo</code> knows about the shell's 8-bit conventions, and prints
|
|
out the NUL, which the terminal doesn't show, then the remainder of the
|
|
string. The external version of <code>echo</code> didn't know any better than to
|
|
stop when it reached the NUL.</p>
|
|
<p>There are actually uses for embedded NULs: some versions of <code>find</code> and
|
|
<code>xargs</code>, for example, will put or accept NULs instead of newlines
|
|
between their bits of input and output (as distinct from command line
|
|
arguments), which is much safer if there's a chance the input or output
|
|
can contain a live newline. Using <code>$'\000'</code> allows the shell to fit in
|
|
very comfortably with these. If you want to try this, the corresponding
|
|
options are <code>-print0</code> for <code>find</code> (print with a NUL terminator instead of
|
|
newline) and <code>-0</code> for <code>xargs</code> (read input assuming a NUL terminator).</p>
|
|
<p>In older versions of the shell, characters with the top bit set, such as
|
|
those from non-English character sets found in ISO 8859 fonts, could
|
|
cause problems, since the shell also uses such characters internally to
|
|
represent its own special characters, but recent versions of the shell
|
|
(from about 3.0) side-step this problem in the same way as for NULs. Any
|
|
remaining problems --- it's quite tricky to handle this completely
|
|
consistently --- are bugs and should be reported.</p>
|
|
<p>You can force parameters to be quoted with POSIX quotes by the somewhat
|
|
absurd expedient of making the <code>q</code> in the quote flag appear a total of
|
|
four times. I can't think why you would ever want to do that, except
|
|
that it will turn newlines into `<code>\n</code>' and hence the result will fit on
|
|
a single (maybe rather long) line. Plus you get the replacement of funny
|
|
characters with escape sequences.</p>
|
|
<p><span id="l116"></span></p>
|
|
<h3 id="514-double-quotes"><a class="header" href="#514-double-quotes">5.1.4: Double quotes</a></h3>
|
|
<p>Double quotes allow some, but not all, forms of substitution inside.
|
|
More specifically, they allow parameter expansion, command substitution
|
|
and arithmetic substitution, but not any of the others: process
|
|
substitution doesn't happen, braces and initial tildes and equals signs
|
|
are not expanded and patterns are not special. Here's a table; each
|
|
expression on the left is some command line argument, and the results
|
|
show what is substituted if it appears outside quotes, or in double
|
|
quotes.</p>
|
|
<pre><code> Expression Outside quotes In double quotes
|
|
------------------------------------------------
|
|
=(echo hi mum) /tmp/zshTiqpL =(echo hi mum)
|
|
$ZSH_VERSION 4.0.1 4.0.1
|
|
$(echo hi mum) hi mum hi mum
|
|
$((6**2 + 6)) 42 42
|
|
{a,b}cd acd bcd {a,b}cd
|
|
~/foo /home/pws/foo ~/foo
|
|
.zl* .zlogin .zlogout .zl*
|
|
</code></pre>
|
|
<p>That `<code>/tmp/zshTiqpL</code>' could be any temporary filename, and indeed
|
|
several of the other substitutions will be different in your case.</p>
|
|
<p>You might already have guessed that `<code>${(qqq)string}</code>' forces <code>$string</code>
|
|
to use double quotes to quote its special characters. As with the other
|
|
forms, this is all properly handled --- the shell knows just which
|
|
characters need quoting inside double quotes, and which don't.</p>
|
|
<p><strong>Word-splitting in double quotes</strong></p>
|
|
<p>Where the substitutions are allowed, the (almost) invariable side effect
|
|
of double quotes is that word-splitting is suppressed. You can see this
|
|
using `<code>print -l</code>', which prints one argument per line:</p>
|
|
<pre><code> % array=(one two)
|
|
% print -l $(echo foo bar) $array
|
|
foo
|
|
bar
|
|
one
|
|
two
|
|
% print -l "$(echo foo bar) $array"
|
|
foo bar one two
|
|
</code></pre>
|
|
<p>The reason this is `almost' invariable is that parameter substitution
|
|
allows you to specify that normal word-splitting will occur. There are
|
|
two ways of doing this; both use the symbol `<code>@</code>'. You probably
|
|
remember this from the parameter `<code>$@</code>' which has just that effect when
|
|
it appears in double quotes: the arguments to the script or function are
|
|
split into words like a normal array, except that empty arguments are
|
|
not removed. I covered this at some length in <a href="zshguide03.html#syntax">chapter
|
|
3</a>.</p>
|
|
<p>This is extended for other parameters in the following way:</p>
|
|
<pre><code> % array=(one two three)
|
|
% print -l "${array[@]}"
|
|
one
|
|
two
|
|
three
|
|
</code></pre>
|
|
<p>and more generally for all forms of substitution using another flag,
|
|
<code>(@)</code>:</p>
|
|
<pre><code> % print -l "${(@)array}"
|
|
one
|
|
two
|
|
three
|
|
</code></pre>
|
|
<p><strong>Digression on subscripts</strong></p>
|
|
<p>The version with flags is perhaps less clear than the other, but it can
|
|
appear in lots of different places. For example, here is how you pick a
|
|
slice of an array in zsh:</p>
|
|
<pre><code> % print -l ${array[2,-1]}
|
|
two
|
|
three
|
|
</code></pre>
|
|
<p>where negative numbers count from the end of the array. The numbers in
|
|
square brackets are referred to as subscripts. This can get the <code>(@)</code>
|
|
treatment, too:</p>
|
|
<pre><code> % print -l "${(@)array[2,-1]}"
|
|
two
|
|
three
|
|
</code></pre>
|
|
<p>Although it's probably not obvious, you can use the other notation in
|
|
this case:</p>
|
|
<pre><code> % print -l "${array[@][2,-1]}"
|
|
two
|
|
three
|
|
</code></pre>
|
|
<p>The shell will actually handle arbitrary numbers of subscripts in
|
|
parameter substitutions, not just one; each applies to the result of the
|
|
previous one:</p>
|
|
<pre><code> % print -l "${array[@][2,-1][1]}"
|
|
two
|
|
</code></pre>
|
|
<p>What you have to watch out for is that that last subscript selected a
|
|
single word. You can continue to apply subscripts, but they will apply
|
|
only on the <em>characters</em> in that word, not on array elements:</p>
|
|
<pre><code> % print -l "${array[@][2,1][1][2,-1]}"
|
|
wo
|
|
</code></pre>
|
|
<p>We've now strayed severely off topic: the subscripts will of course work
|
|
quite independently from whether the word is being split or appears in
|
|
double quotes. Despite the joining of words that occurs in double
|
|
quotes, subscripts of arrays still select array elements. This is a
|
|
consequence of the order in which the rules of parameter expansion
|
|
apply. There is a long, involved section on this in the <code>zshexpn</code> manual
|
|
entry (look for the heading `Rules' there or in the `Parameter
|
|
Expansion' node of the corresponding Info or HTML file).</p>
|
|
<p><strong>Word-splitting of quoted command substitutions</strong></p>
|
|
<p>Zsh has the useful feature that you can force the shell to apply the
|
|
rules of parameter expansion to the result of a command substitution. To
|
|
see where that might be useful, consider the case of the special
|
|
`command substitution' (although it's handled entirely in the shell,
|
|
not by running an external command) which puts the contents of a file on
|
|
the command line:</p>
|
|
<pre><code> % args() { print $#; } # report number of arguments
|
|
% cat file
|
|
Words on line one
|
|
Words on line two
|
|
% args $(<file)
|
|
8
|
|
% args "$(<file)"
|
|
1
|
|
</code></pre>
|
|
<p>The unquoted substitution split the file into individual words; the
|
|
quoted substitution didn't split it at all. These are the standard shell
|
|
rules.</p>
|
|
<p>It's very common, however, that you want one line per argument, not
|
|
splitting on spaces within the line. This is where parameter expansion
|
|
can come in. There is a flag <code>(f)</code> which says `split the result of the
|
|
expansion, one word per line'. Here's how to use it in this case:</p>
|
|
<pre><code> % args "${(f)$(<file)}"
|
|
2
|
|
</code></pre>
|
|
<p>Where you would usually put the name of a parameter, you put the command
|
|
substitution instead, and the shell operates on the result of that (note
|
|
that it does not treat the result as the name of a parameter, but as a
|
|
value --- this is discussed in more detail below). The double quotes
|
|
were necessary because otherwise the file would already have been split
|
|
into individual words by the time the parameter substitution came to
|
|
look at the result. You can easily verify that the two arguments are the
|
|
individual lines of the file. I don't remember what the `<code>f</code>' stands
|
|
for, but we were already using up flag codes quite fast when it came
|
|
along; Bart Schaefer believes it stands for `fold', which might at
|
|
least help you remember it.</p>
|
|
<p><span id="l117"></span></p>
|
|
<h3 id="515-backquotes"><a class="header" href="#515-backquotes">5.1.5: Backquotes</a></h3>
|
|
<p>The main thing to say about backquotes is that you should use the other
|
|
form of command substitution instead. There are two good reasons.</p>
|
|
<p>First, the other form can be nested:</p>
|
|
<pre><code> % print $(print $(print a word))
|
|
a word
|
|
</code></pre>
|
|
<p>Obviously that's a silly example, but the main point is that the only
|
|
time parentheses should occur unquoted in the shell is in pairs (the
|
|
patterns in case statements are an exception, but pairs of parentheses
|
|
around patterns are valid, too, and I have used that form in this
|
|
guide). Thus you can be confident that any piece of well-formatted shell
|
|
code can appear inside the command substitution.</p>
|
|
<p>This is clearly not true with <code>`...`</code>, even though the basic effect
|
|
is the same. Any unquoted <code>`</code> which happens to appear in a chunk of
|
|
code within the backquotes will be treated as the end of the quotes.</p>
|
|
<p>The second reason, which is closely related, is that it can be quite
|
|
difficult to decide how many levels of quotes are required inside a
|
|
backquoted expression. Consider:</p>
|
|
<pre><code> % print "`echo \"hello\"`"
|
|
hello
|
|
% print "$(echo \"hello\")"
|
|
"hello"
|
|
</code></pre>
|
|
<p>It's hard to explain quite what the difference here is without waving my
|
|
hands, which prevents me from typing, but the essential point is really
|
|
the same one about nesting: you can't do it with backquotes, because the
|
|
start and end symbols are the same, but you can do it with parentheses.
|
|
So in the second case there is no doubt that the embedded command line,
|
|
`<code>echo \"hello\"</code>', is to be treated exactly as if that had appeared
|
|
outside the command substitution; whereas in the first place, the quotes
|
|
within quotes had to be, um, quoted.</p>
|
|
<p>As a consequence, in</p>
|
|
<pre><code> % print "$(echo "hello")"
|
|
hello
|
|
</code></pre>
|
|
<p>you need to be careful: at first glance, the pairs of double quotes
|
|
surround `<code>$</code>(<code>echo </code>' and `)', but they don't, they are nested by
|
|
virtue of the substitution. You see the same thing with parameter
|
|
substitution:</p>
|
|
<pre><code> % unset foo
|
|
% print "${foo:-"a string"}"
|
|
a string
|
|
</code></pre>
|
|
<p>A third, less good, reason for using the form with parentheses is that
|
|
your more sophisticated friends will laugh at you otherwise. Peer
|
|
pressure is so important in this complex world.</p>
|
|
<p>That's all I have to say about command substitution, since I already
|
|
said a lot about it when I discussed the basic syntax in <a href="zshguide03.html#syntax">chapter
|
|
3</a>.</p>
|
|
<p><span id="l118"></span></p>
|
|
<h2 id="52-modifiers-and-what-they-modify"><a class="header" href="#52-modifiers-and-what-they-modify">5.2: Modifiers and what they modify</a></h2>
|
|
<p>Modifiers were introduced in <a href="zshguide02.html#init">chapter 2</a> when I
|
|
talked about `bang history', since that's where they came from. In zsh,
|
|
however, they can be used in a couple of other places. They have the
|
|
same form in each case: a colon, followed by a letter which is the code
|
|
for what the modifier does, possibly (in the case of substitutions)
|
|
followed by some other string. So, to jog your memory, unless you have
|
|
<code>NO_BANG_HIST</code> set:</p>
|
|
<pre><code> % print ~/file
|
|
/home/pws/file
|
|
% print !-1:t
|
|
file
|
|
</code></pre>
|
|
<p>where `<code>:t</code>' takes the tail (non-directory part) of the filename.</p>
|
|
<p>The second use is in parameters. This follows on very naturally. Note
|
|
that neither this nor any of the later uses of modifiers rely on the
|
|
<code>NO_BANG_HIST</code> option; that's purely for history.</p>
|
|
<pre><code> % param=~/file
|
|
% print ${param:t}
|
|
file
|
|
</code></pre>
|
|
<p>Normally you can miss out the braces in the parameter substitution, but
|
|
I tend to use them with modifiers for the sake of clarity. The fact that
|
|
the same parts of the shell are used for modifiers wherever they come
|
|
from has certain consequences:</p>
|
|
<pre><code> % print foo
|
|
foo
|
|
% ^foo^bar
|
|
bar
|
|
% param='this sentence contains a foo.'
|
|
% print ${param:&}
|
|
this sentence contains a bar.
|
|
</code></pre>
|
|
<p>The ampersand repeats the last substitution, which is the same for
|
|
parameter modifiers as for history modifiers. I find parameter modifiers
|
|
even more useful than history ones; extracting the head or tail of a
|
|
path is a very common operation on parameters.</p>
|
|
<p>Modifiers are also smart enough to handle arrays in a useful fashion.
|
|
Note this is not true of sets of arguments in history expansions;
|
|
`<code>:t</code>' will only extract one tail in that case, which may not be quite
|
|
what you're expecting:</p>
|
|
<pre><code> % print a sentence with a /real/live/bogus/path in it.
|
|
% print !!:t
|
|
path in it.
|
|
</code></pre>
|
|
<p>However, arrays <em>are</em> handled the way you might hope:</p>
|
|
<pre><code> % array=(~/.zshenv ~/.zshrc ~/.zlogout)
|
|
% print ${array:t}
|
|
.zshenv .zshrc .zlogout
|
|
</code></pre>
|
|
<p>The same logic is applied with substitutions. This means that the first
|
|
match in every element of the array is replaced:</p>
|
|
<pre><code> % array=('a bar of chocolate' 'a bar of barflies'
|
|
array> 'a barrier of barns')
|
|
% print ${array:s/bar/car/}
|
|
a car of chocolate a car of barflies a carrier of barns
|
|
</code></pre>
|
|
<p>unless, of course, you do a global replacement:</p>
|
|
<pre><code> % print ${array:gs/bar/car/}
|
|
a car of chocolate a car of carflies a carrier of carns
|
|
</code></pre>
|
|
<p>Note, however, that parameter substitution has its own <em>much</em> more
|
|
powerful equivalent, which does pattern matching, partial replacement of
|
|
modified parts of the original string, and so on. We'll come to this all
|
|
in good time.</p>
|
|
<p>The final use of modifiers is in filename generation, i.e. globbing.
|
|
Since this usually works by having special characters on the command
|
|
line, and modifiers just consist of ordinary characters, the syntax is a
|
|
little different:</p>
|
|
<pre><code> % print *.c
|
|
parser.c lexer.c input.c output.c
|
|
% print *.c(:r)
|
|
parser lexer input output
|
|
</code></pre>
|
|
<p>so you need parentheses around them. This is a special case of `glob
|
|
qualifiers' which you'll meet below; you can mix them, but the modifiers
|
|
must appear at the end. For example,</p>
|
|
<pre><code> % print -l ~/stuff/*
|
|
/home/pws/stuff/onefile.c
|
|
/home/pws/stuff/twofile.c
|
|
/home/pws/stuff/subdir
|
|
% print ~/stuff/*(.:r:t)
|
|
onefile twofile
|
|
</code></pre>
|
|
<p>The globbing qualifier `<code>.</code>' specifies that files must be regular, i.e.
|
|
not directories nor some form of special file. The `<code>:r</code>' removes the
|
|
suffix from the result, and the `<code>:t</code>' takes away the directory part.
|
|
Consequently, filename modifiers will be turned off if you set the
|
|
option <code>NO_BARE_GLOB_QUAL</code>.</p>
|
|
<p>Two final points to note about modifiers with filenames. First, it is
|
|
the only form of globbing where the result is no longer a filename; it
|
|
is always performed right at the end, after all normal filename
|
|
generation. Presumably, in the examples above, the word which was
|
|
inserted into the command line doesn't actually correspond to a real
|
|
file any more.</p>
|
|
<p>Second, although it <em>does</em> work if the word on the command line isn't a
|
|
pattern but an ordinary word with a modifier tacked on, it <em>doesn't</em>
|
|
work if that pattern, before modification, doesn't correspond to a real
|
|
file. So `<code>foo.c(:r)</code>' will only strip off the suffix if <code>foo.c</code> is
|
|
there in the current directory. This is perfectly logical given that the
|
|
attempt to match a file kicks the globbing system, including modifiers,
|
|
into action. If this is a problem for you, there are ways round; for
|
|
example, insert the right value by hand in a simple case like this, or
|
|
more realistically store the value in a parameter and apply the modifier
|
|
to that.</p>
|
|
<p><span id="l119"></span></p>
|
|
<h2 id="53-process-substitution"><a class="header" href="#53-process-substitution">5.3: Process Substitution</a></h2>
|
|
<p>I don't have much new to say on process substitution, but I do have an
|
|
example of where I find it useful. If you use the pager `less' you may
|
|
know it has the facility to preprocess the files you look at, for
|
|
example uncompressing files temporarily via the environment variable
|
|
<code>$LESSOPEN</code> (and maybe <code>$LESSCLOSE</code>). Zsh can very easily and, to my
|
|
thoroughly unbiased way of looking, more conveniently do the same thing.
|
|
Here's a subset of my zsh function front-end to less --- or indeed any
|
|
pager, which is given here by the standard environment variable <code>$PAGER</code>
|
|
with the default <code>less</code>. You can hard-wire any file-displaying command
|
|
at that point if you prefer.</p>
|
|
<pre><code> integer i=1
|
|
local args arg
|
|
args=($*)
|
|
|
|
for arg in $*; do
|
|
case $arg in
|
|
(*.bz2) args[$i]="=(bunzip2 -c ${(q)arg})"
|
|
;;
|
|
# this assumes your zcat is the one installed with gzip:
|
|
(*.(gz|Z)) args[$i]="=(zcat ${(q)arg})"
|
|
;;
|
|
(*) args=${(q)arg}
|
|
;;
|
|
esac
|
|
(( i++ ))
|
|
done
|
|
|
|
eval command ${PAGER:-less} $args
|
|
</code></pre>
|
|
<p>The main pieces of interest is how elements of the array <code>$args</code> were
|
|
replaced. The reason each argument was given an extra layer of quotes
|
|
via <code>(q)</code> is the <code>eval</code> at the end; <code>$args</code> is turned into an array of
|
|
literal characters first, which hence need quoting to protect special
|
|
characters. Without that, filenames with spaces or asterisks or whatever
|
|
wouldn't be shown properly.</p>
|
|
<p>The reason the <code>eval</code> is there is so that the process substitutions are
|
|
evaluated on the command line when the pager is run, and not before.
|
|
They are assigned back to elements of <code>$args</code> in quotes, so don't get
|
|
evaluated at that point. The effect will be to turn:</p>
|
|
<pre><code> less file.gz file.txt
|
|
</code></pre>
|
|
<p>into</p>
|
|
<pre><code> less =(zcat file.gz) file.txt
|
|
</code></pre>
|
|
<p>The `<code>command</code>' at the end of the function is there just in case the
|
|
function has the same name as the pager (i.e. `less' in this example);
|
|
it forces the external command to be called rather than the function.
|
|
The process substitution is ideal in this context; it provides <code>less</code>
|
|
with the name of a file to which the decompressed contents of <code>file.gz</code>
|
|
have been sent, and it deletes the file after the command exits.
|
|
Furthermore, the substitution happens in such a way that you can still
|
|
specify multiple files on the command line as you usually can with less.
|
|
The only problem is that the filename that appears in the `<code>less</code>'
|
|
prompt is meaningless.</p>
|
|
<p>In case you haven't come across it, <code>bzip2</code> is a programme very similar
|
|
to <code>gzip</code>, and it is used almost identically, but it provides better
|
|
compression.</p>
|
|
<p>There's an infelicity in output process substitutions, just as there is
|
|
with multios.</p>
|
|
<pre><code> echo hello > >(sed s/hello/goodbye)
|
|
</code></pre>
|
|
<p>The shell spawns the <code>sed</code> process to handle the output from the command
|
|
line --- and then forgets about it. It does not wait for it (at least,
|
|
not until after it exits, when it will use the <code>wait</code> system call to
|
|
tidy up). So it is dangerous to rely on the result of the process being
|
|
available in the next command. If you try it interactively, in fact, you
|
|
may well find that the next prompt is printed before the output from
|
|
<code>sed</code> shows up on the terminal. This can probably be considered a bug,
|
|
but it is quite difficult to fix.</p>
|
|
<p><span id="l120"></span></p>
|
|
<h2 id="54-parameter-substitution"><a class="header" href="#54-parameter-substitution">5.4: Parameter substitution</a></h2>
|
|
<p>You can probably see from the above that parameter substitutions are at
|
|
the heart of much of the power available to transform zsh command lines.
|
|
What's more, we haven't covered even a significant fraction of what's on
|
|
offer.</p>
|
|
<p><span id="l121"></span></p>
|
|
<h3 id="541-using-arrays"><a class="header" href="#541-using-arrays">5.4.1: Using arrays</a></h3>
|
|
<p>The array syntax in zsh is quite powerful (surprised?); just don't
|
|
expect it to be as efficient as, say, perl. Like other features of zsh,
|
|
it exists to make users' lives easier, not to make your computer run
|
|
blindingly fast.</p>
|
|
<p>I've covered, somewhat sporadically, how to set arrays, and how to
|
|
extract bits of them --- the following illustrates this:</p>
|
|
<pre><code> % array=(one two three four)
|
|
% print ${array}
|
|
one two three four
|
|
% print ${array[3]}
|
|
three
|
|
% print ${array[2,-1]}
|
|
two three four
|
|
</code></pre>
|
|
<p>Remember you need `<code>typeset</code>' or equivalent if you want the array to be
|
|
local to a function. The neat way is `<code>typeset -a</code>', which creates an
|
|
empty array, but as long as you assign to the array before trying to use
|
|
it any old <code>typeset</code> will do.</p>
|
|
<p>You can use the array index and array slice notations for assigning to
|
|
arrays, in other words on the left-hand side of an `<code>=</code>':</p>
|
|
<pre><code> % array=(what kind of fool am i)
|
|
% array[2]=species
|
|
% print $array
|
|
what species of fool am i
|
|
% array[2]=(a piece)
|
|
% print $array
|
|
what a piece of fool am i
|
|
% array[-3,-1]=(work is a man)
|
|
% print $array
|
|
what a piece of work is a man
|
|
</code></pre>
|
|
<p>So you can replace a single element of an array by a single element, or
|
|
by an array slice; likewise you can replace a slice in one go by a slice
|
|
of a different length --- only the bits you explicitly tell it to
|
|
replace are changed, the rest is left intact and maybe shifted along to
|
|
make way. This is similar to perl's `splice' command, only for once
|
|
maybe a bit more memorable. Note that you shouldn't supply any braces on
|
|
the left hand side. The appearance of the expression in an assignment is
|
|
enough to trigger the special behaviour of subscripts, even if
|
|
<code>KSH_ARRAYS</code> is in effect --- though you need to subtract one from your
|
|
subscripts in that case.</p>
|
|
<p>You can remove bits in the middle, too, but note you should use an empty
|
|
array:</p>
|
|
<pre><code> % array=(one two three four)
|
|
% print $#array
|
|
4
|
|
% array[2]=
|
|
% print $#array
|
|
4
|
|
% array[2]=()
|
|
% print $#array
|
|
3
|
|
</code></pre>
|
|
<p>The first assignment set element 2 to the empty string, it didn't remove
|
|
it. The second replaced the array element with an array of length zero,
|
|
which did remove it.</p>
|
|
<p>Just as parameter substitutions have flags for special purposes, so do
|
|
subscripts. You can force them to search through arrays, matching on the
|
|
values. You can return the value matched ((r)everse subscripting):</p>
|
|
<pre><code> % array=(se vuol ballare signor contino)
|
|
% print ${array[(r)s*]}
|
|
se
|
|
% print ${array[(R)s*]}
|
|
signor
|
|
</code></pre>
|
|
<p>The <code>(r)</code> flag takes a pattern and substitutes the first element of the
|
|
array matched, while the <code>(R)</code> flag does the same but starting from the
|
|
end of the array. If nothing matched, you get the empty string; as usual
|
|
with parameters, this will be omitted if it's the only thing in an
|
|
unquoted argument. Using our <code>args</code> function to count the arguments
|
|
passed to a command again:</p>
|
|
<pre><code> % array=(some words)
|
|
% args() { print $#; }
|
|
% args ${array[(r)s*]}
|
|
1
|
|
% args ${array[(r)X*]}
|
|
0
|
|
% args "${array[(r)X*]}"
|
|
1
|
|
</code></pre>
|
|
<p>where in the last case the empty string was quoted, and passed down as a
|
|
single, empty argument.</p>
|
|
<p>You can also return the index matched; <code>(i)</code> to start matching from the
|
|
beginning, and <code>(I)</code> to start from the end.</p>
|
|
<pre><code> % array=(se vuol venire nella mia scuola)
|
|
% print ${array[(i)v*]}
|
|
2
|
|
% print ${array[(I)v*]}
|
|
3
|
|
</code></pre>
|
|
<p>matching `vuol' the first time and `venire' the second. What happens
|
|
if they don't match may be a little unexpected, but is reasonably
|
|
logical: you get the next index along. In other words, failing to match
|
|
at the end gives you the length of the array plus one, and failing to
|
|
match at the beginning gives you zero, so:</p>
|
|
<pre><code> array=(three egregious words)
|
|
for pat in '*e*e*' '*a*a*'; do
|
|
if [[ ${array[(i)$pat]} -le ${#array} ]]; then
|
|
print "Pattern $pat matched in array: ${array[(r)$pat]}."
|
|
else
|
|
print "Pattern $pat failed to match in array"
|
|
fi
|
|
done
|
|
</code></pre>
|
|
<p>prints:</p>
|
|
<pre><code> Pattern *e*e* matched in array: three.
|
|
Pattern *a*a* failed to match in array
|
|
</code></pre>
|
|
<p>If you adapt that chunk of code, you'll see you get the indices 1 and 4
|
|
returned. Note that the characters in <code>$pat</code> were treated as a pattern
|
|
even though putting <code>$pat</code> on the command line would normally just
|
|
produce the characters themselves. Subscripts are special in that way;
|
|
trying to keep the syntax under control at this point is a little hairy.
|
|
There is a more detailed description of this in the manual in the
|
|
section `Subscript Parsing' of the <code>zshparam</code> manual page or the
|
|
`Array Parameters' info node; to quote the characters in <code>pat</code>, you
|
|
would actually have to supply the command line strings <code>'\*e\*e\*'</code> and
|
|
<code>'\*a\*a\*'</code>. Just go round mumbling `extra layer of pattern expansion'
|
|
and everyone will think you know what you're talking about (it works for
|
|
me, fitfully).</p>
|
|
<p>There is currently no way of extracting a complete set of matches from
|
|
an ordinary array with subscript flags. We'll see other ways of doing
|
|
that below, however.</p>
|
|
<p><span id="l122"></span></p>
|
|
<h3 id="542-using-associative-arrays"><a class="header" href="#542-using-associative-arrays">5.4.2: Using associative arrays</a></h3>
|
|
<p>Look back at <a href="zshguide03.html#syntax">chapter 3</a> if you've forgotten
|
|
about associative arrays. These take subscripts, like ordinary arrays
|
|
do, but here the subscripts are arbitrary strings (or keys) associated
|
|
with the value stored in the element of the array. Remember, you need to
|
|
use `<code>typeset -A</code>' to create one, or one of <code>typeset</code>'s relatives with
|
|
the same option. This means that if you created it inside a function it
|
|
will be limited to the local scope, so if you want to create a global
|
|
associative array you will need to give the <code>-g</code> flag as well. This is
|
|
particularly common with associative arrays, which are often used to
|
|
store global information such as configuration details.</p>
|
|
<p>Retrieving information from associative arrays can get you into some of
|
|
the problems already hinted at in the use of subscript flags with
|
|
arrays. However, since normal subscripting doesn't make patterns active,
|
|
there is a way round here: make the subscript into another parameter:</p>
|
|
<pre><code> % typeset -A assoc
|
|
% assoc=(key value Shlüssel Wert clavis valor)
|
|
% subscript='key'
|
|
% print ${assoc[$subscript]}
|
|
value
|
|
</code></pre>
|
|
<p>I used fairly boring keys here, but they can be any string of
|
|
characters:</p>
|
|
<pre><code> % assoc=(']' right\ square\ bracket '*' asterisk '@' at\ sign)
|
|
% subscript=']'
|
|
% print ${assoc[$subscript]}
|
|
right square bracket
|
|
</code></pre>
|
|
<p>and <em>that</em> is harder to get the other way. Nonetheless, if you define
|
|
your own keys you will often use simple words, and in that case they can
|
|
happily appear directly in the square brackets.</p>
|
|
<p>I introduced two parameter flags, <code>(k)</code> and <code>(v)</code> in <a href="zshguide03.html#syntax">chapter
|
|
3</a>:</p>
|
|
<pre><code> % print ${(k)assoc}
|
|
* ] @
|
|
</code></pre>
|
|
<p>prints out keys, while</p>
|
|
<pre><code> % print ${(kv)assoc}
|
|
* asterisk ] right square bracket @ at sign
|
|
</code></pre>
|
|
<p>and the remaining two possibilities do the same thing:</p>
|
|
<pre><code> % print ${(v)assoc}
|
|
asterisk right square bracket at sign
|
|
% print ${assoc}
|
|
asterisk right square bracket at sign
|
|
</code></pre>
|
|
<p>You now know these are part of a much larger family of tricks to apply
|
|
to substitutions. There's nothing to stop you combining flags:</p>
|
|
<pre><code> % print -r ${(qkv)assoc}
|
|
\* asterisk \] right\ square\ bracket @ at\ sign
|
|
</code></pre>
|
|
<p>which helps see the wordbreaks. Don't forget the `<code>print -l</code>' trick for
|
|
separating out different words, and hence elements of arrays and
|
|
associative arrays:</p>
|
|
<pre><code> % print -l ${(kv)assoc}
|
|
*
|
|
asterisk
|
|
]
|
|
right square bracket
|
|
@
|
|
at sign
|
|
</code></pre>
|
|
<p>which is quite a lot clearer. As always, this will fail if you engage in
|
|
un-zsh activities with <code>SH_WORD_SPLIT</code>, but judicious use of <code>@</code>,
|
|
whether as a flag or a subscript, and double quotes, will always work:</p>
|
|
<pre><code> % print -l "${(@kv)assoc}"
|
|
*
|
|
asterisk
|
|
]
|
|
right square bracket
|
|
@
|
|
at sign
|
|
</code></pre>
|
|
<p>regardless of the option setting.</p>
|
|
<p>Apart from the subscripts, the second major difference between
|
|
associative and ordinary arrays is that the former don't have any order
|
|
defined. This will be entirely familiar if you have used Perl; the
|
|
principle here is identical. However, zsh has no notion at all, even as
|
|
a convenience, of slices of associative arrays. You can assign
|
|
individual elements or whole associative arrays --- remembering that in
|
|
the second case the right hand side must consist of key/value pairs ---
|
|
but you can't assign subgroups. Any attempt to use the slice notation
|
|
with commas will be met by a stern error message.</p>
|
|
<p>What zsh does have, however, is extra subscript flags for you to match
|
|
and retrieve one or more elements. If instead of an ordinary subscript
|
|
you use a subscript preceded by the flag <code>(i)</code>, the shell will search
|
|
for a matching key (not value) with the pattern given and return that.
|
|
This is deliberately the same as searching an ordinary array to get its
|
|
key (which in that case is just a number, the index), but note this time
|
|
it doesn't match on the value, it really does match, as well as return,
|
|
the key:</p>
|
|
<pre><code> % typeset -A assoc
|
|
% assoc=(fred third\ man finnbar slip roger gully trevor long\ off)
|
|
% print ${assoc[(i)f*]}
|
|
fred
|
|
</code></pre>
|
|
<p>You can still use the parameter flags <code>(k)</code> and <code>(v)</code> to tell the shell
|
|
which part of the key and/or value to return:</p>
|
|
<pre><code> % print ${(kv)assoc[(i)f*]}
|
|
fred third man
|
|
</code></pre>
|
|
<p>Note the division of labour. The subscript flag tells the shell what to
|
|
match against, while the parameter flags tell it which bit of the
|
|
matched element(s) you actually want to see.</p>
|
|
<p>Because of the essentially random ordering of associative arrays, you
|
|
couldn't tell here whether fred or finnbar would be chosen. However, you
|
|
can use the capital form <code>(I)</code> to tell the shell to retrieve all
|
|
matches. This time, let's see the values of the elements for which the
|
|
keys were matched:</p>
|
|
<pre><code> % print -l ${(v)assoc[(I)f*]}
|
|
third man
|
|
slip
|
|
</code></pre>
|
|
<p>and here we also got the position occupied by <code>finnbar</code>. The same rules
|
|
about patterns apply as with <code>(r)</code> in ordinary arrays --- a subscript is
|
|
treated as a pattern even if it came from a parameter substitution
|
|
itself.</p>
|
|
<p>You probably aren't surprised to hear that the subscript flags <code>(r)</code> and
|
|
<code>(R)</code> try to match the values of the associative array rather than its
|
|
keys. These, too, print out the actual part matched, here the value,
|
|
unless you use the parameter flags.</p>
|
|
<pre><code> % print ${assoc[(r)*i*]}
|
|
third man
|
|
% print ${(k)assoc[(R)*i*]}
|
|
fred finnbar
|
|
</code></pre>
|
|
<p>There's one more pair of subscript flags of particular relevance to
|
|
associative arrays, <code>(k)</code> and <code>(K)</code>. These work a bit like a case
|
|
statement: the subscripts are treated as strings, and the keys of the
|
|
associative arrays as patterns, instead of the other way around. With
|
|
<code>(k)</code>, the value of the first key which matches the subscript is
|
|
substituted; with <code>(K)</code>, the values of all matching keys are substituted</p>
|
|
<pre><code> % typeset -A assoc
|
|
% assoc=('[0-9]' digit '[a-zA-Z]' letter '[^0-9a-zA-Z]' neither)
|
|
% print ${assoc[(k)0]}
|
|
digit
|
|
% print ${assoc[(k)_]}
|
|
neither
|
|
</code></pre>
|
|
<p>In case you're still confused, the `<code>0</code>' in the first subscript was
|
|
taken as a string and all the keys in <code>$assoc</code> were treated as patterns
|
|
in turn, a little like</p>
|
|
<pre><code> case 0 in
|
|
([0-9]) print digit
|
|
;;
|
|
([a-zA-Z]) print letter
|
|
;;
|
|
([^0-9a-zA-Z]) print neither
|
|
;;
|
|
esac
|
|
</code></pre>
|
|
<p>One important way in which this is <em>not</em> like the selection in a case
|
|
statement is that you can't rely on the order of the comparison, so you
|
|
can't rely on more general patterns being matched after more specific
|
|
ones. You just have to use keys which are sufficiently explicit to match
|
|
just the strings you want to match and no others. That's why we picked
|
|
the pattern `<code>[^0-9a-zA-Z]</code>' instead of just `<code>*</code>' as we would
|
|
probably have used in the case statement.</p>
|
|
<p>I said storing information about configuration was a common use of
|
|
associative arrays, but the shell has a more powerful way of doing that:
|
|
styles, which will figure prominently in the discussion of programmable
|
|
completion in the next chapter. The major advantage of styles over
|
|
associative arrays is that they can be made context-sensitive; you can
|
|
easily make the same style return the same value globally, or make it
|
|
have a default but with a different value in one particular context, or
|
|
give it a whole load of different values in different places. Each shell
|
|
application can decide what is meant by a `context'; you are not tied
|
|
to the same scheme as the completion system uses, or anything like it.
|
|
Use of hierarchical contexts in the manner of the completion system does
|
|
mean that it is easy to create sets of styles for different modules
|
|
which don't clash.</p>
|
|
<p>Here, finally, is a comparison of some of the uses of associative arrays
|
|
in perl and zsh.</p>
|
|
<pre><code> perl zsh
|
|
-----------------------------------------------------------------
|
|
%hash = qw(key value); typeset -A hash; hash=(key value)
|
|
$hash{key} ${hash[key]}
|
|
keys %hash ${(k)hash}
|
|
values %hash ${(v)hash}
|
|
%hash2 = %hash; typeset -A hash2; hash2=("${(@kv)hash}")
|
|
unset %hash; unset hash
|
|
if (exists $hash{key}) { if (( ${+hash[key]} )); then
|
|
... ...
|
|
} fi
|
|
</code></pre>
|
|
<p>One final reminder: if you are creating associative arrays inside a
|
|
function which need to last beyond the end of the function, you should
|
|
create them with `<code>typeset -gA</code>' which puts them into the surrounding
|
|
scope. The `<code>-g</code>' flag is of course useful with all types of parameter,
|
|
but the associative array is the only type that doesn't automatically
|
|
spring into existence when you assign to it in the right context; hence
|
|
the flag is particularly worthy of note here.</p>
|
|
<p><span id="l123"></span></p>
|
|
<h3 id="543-substituted-substitutions-top--and-tailing-etc"><a class="header" href="#543-substituted-substitutions-top--and-tailing-etc">5.4.3: Substituted substitutions, top- and tailing, etc.</a></h3>
|
|
<p>There are many transformations which you can do on the result of a
|
|
parameter substitution. The most powerful involve the use of patterns.
|
|
For this, the more you know about patterns, the better, so I will
|
|
reserve explanation of some of the whackiest until after I have gone
|
|
into more detail on patterns. In particular, it's useful if you know how
|
|
to tell the shell to mark subexpressions which it has matched for future
|
|
extraction. However, you can do some very useful things with just the
|
|
basic patterns common to all shells.</p>
|
|
<p><strong>Standard forms: lengths</strong></p>
|
|
<p>I'll separate out zsh-specific forms, and start off with some which
|
|
appear in all shells derived from the Bourne shell. A more compact
|
|
(read: terse) list is given in the manual, as always.</p>
|
|
<p>A few simple forms don't use patterns. First, the substitution
|
|
<code>${#</code><em>param</em><code>}</code> outputs the length of <code>$</code><em>param</em>. In zsh, you don't need
|
|
the braces here, though in most other shells with this feature you do.
|
|
Note that <code>${#}</code> on its own is the number of parameters in the command
|
|
line argument array, which is why explicit use of braces is clearer.</p>
|
|
<p><code>$#</code> works differently on scalar values and array values; in the former
|
|
case, it gives the length in characters, and in the latter case the
|
|
length in elements. Note that I said `values', not `parameters' ---
|
|
you have to work out whether the substitution is giving you a scalar or
|
|
an array:</p>
|
|
<pre><code> % print ${#path}
|
|
8
|
|
% print ${#path[1]}
|
|
13
|
|
</code></pre>
|
|
<p>The first result shows I have 8 directories in my path, the latter that
|
|
the first directory (actually `<code>/home/pws/bin</code>') has 13 characters. You
|
|
should bear this in mind with nested substitutions, as discussed below,
|
|
which can also return either an array or a scalar.</p>
|
|
<p>Earlier versions of zsh always returned a character count if the
|
|
expression was in double quotes, or anywhere the shell evalauted the
|
|
expression as a single word, but that doesn't happen any more; it
|
|
depends only on the type of the value. However, you can force the shell
|
|
to count characters by using the <code>(c)</code> flag, and to count words (even in
|
|
scalars, which it will split if necessary) by using <code>(w)</code>:</p>
|
|
<pre><code> % print ${#PATH}
|
|
84
|
|
% print ${(c)#path}
|
|
84
|
|
% foo="three scalar words"
|
|
% print ${(w)#foo}
|
|
3
|
|
</code></pre>
|
|
<p>Comparing the first two, you will see that character count with arrays
|
|
includes the space used for separating (equal to the number of colons
|
|
separating the elements in <code>$PATH</code>). There's a relative of <code>(w)</code> called
|
|
<code>(W)</code>, which treats multiple word separators as having zero-length words
|
|
in between:</p>
|
|
<pre><code> % foo="three well-spaced word"
|
|
% print ${(w)#foo}
|
|
3
|
|
% print ${(W)#foo}
|
|
5
|
|
</code></pre>
|
|
<p>giving two extra words over <code>(w)</code>, which treats the groups of spaces in
|
|
the same way as one. Being parameter flags, these modifications of the
|
|
syntax are specific to zsh.</p>
|
|
<p>Note that if you use lengths in an arithmetic context (inside <code>((...))</code>
|
|
or <code>$((...))</code>), you must include the leading `<code>$</code>', which you don't
|
|
need for substituting the parameters themselves. That's because
|
|
`<code>#foo</code>' means something different here --- the number in the ASCII
|
|
character set (or whatever extension of it you are using if it is an
|
|
extended character set) of the first character in <code>$foo</code>.</p>
|
|
<p><strong>Standard forms: conditional substitutions</strong></p>
|
|
<p>The next group of substitutions is a whole series where the parameter is
|
|
followed by an option colon and then `<code>-</code>', `<code>=</code>', `<code>+</code>' or `<code>?</code>'.
|
|
The colon has the same effect in each case: without a colon, the shell
|
|
tests whether the parameter is set before performing the operation,
|
|
while with the colon it tests whether the parameter has non-zero length.</p>
|
|
<p>The simplest is `<code>${</code><em>param</em><code>:-</code><em>value</em><code>}</code>'. If <code>$param</code> has non-zero
|
|
length (without the colon, if it is set at all), use its value, else use
|
|
the <em>value</em> supplied. Suppose <code>$foo</code> wasn't set at the start of the
|
|
following (however unlikely that may seem):</p>
|
|
<pre><code> % print ${foo-bar}
|
|
bar
|
|
% foo=''
|
|
% print ${foo-bar}
|
|
|
|
% print ${foo:-bar}
|
|
bar
|
|
% foo='please no anything but bar'
|
|
% print ${foo:-bar}
|
|
please no anything but bar
|
|
</code></pre>
|
|
<p>It's more usual to use the form with the colon. One reason for that is
|
|
that in functions you will often create the parameter with a <code>typeset</code>
|
|
before using it, in which case it always exists, initially with zero
|
|
length, so that the other form would never use the default value. I'll
|
|
use the colon for describing the other three types.</p>
|
|
<p>`<code>${</code><em>param</em><code>:=</code><em>value</em><code>}</code>' is similar to the previous type. but in
|
|
this case the shell will not only substitute <em>value</em> into the line, it
|
|
will assign it to <em>param</em> if (and only if) it does so. This leads to the
|
|
following common idiom in scripts and functions:</p>
|
|
<pre><code> : ${MYPARAM:=default} ${OTHERPARAM:=otherdefault}
|
|
</code></pre>
|
|
<p>If the user has already set <code>$MYPARAM</code>, nothing happens, otherwise it
|
|
will be set to `<code>default</code>', and similarly for <code>${OTHERPARAM}</code>. The
|
|
`<code>:</code>' command does nothing but return true after the command line has
|
|
been processed.</p>
|
|
<p>`<code>${</code><em>param</em><code>:+</code><em>value</em><code>}</code>' is the opposite of `<code>:-</code>', logically
|
|
enough: the <em>value</em> is substituted if the parameter <em>doesn't</em> have zero
|
|
length. In this case, <em>value</em> will often be another parameter
|
|
substitution:</p>
|
|
<pre><code> print ${value:+"the value of value is $value"}
|
|
</code></pre>
|
|
<p>prints the string only if <code>$#value</code> is greater than zero. Note that what
|
|
can appear after the `<code>+</code>' is pretty much any single word the shell can
|
|
parse; all the usual single-word substitutions (so globbing is excluded)
|
|
will be applied to it, and quotes will work just the same as usual. This
|
|
applies to the values after `<code>:-</code>' and `<code>:=</code>', too. One other commonly
|
|
seen trick might be worth mentioning:</p>
|
|
<pre><code> print ${1+"$@"}
|
|
</code></pre>
|
|
<p>substitutes all the positional parameters as they were passed if the
|
|
first one was set (here you don't want the colon). This was necessary in
|
|
some old shells because <code>"$@"</code> on its own gave you a single empty
|
|
argument instead of no arguments when no arguments were passed. This
|
|
workaround isn't necessary in zsh, nor in most modern Bourne-derived
|
|
shells. There's a bug in zsh's handling, however; see the section on
|
|
function parameters in chapter 3.</p>
|
|
<p>The final type isn't that often used (meaning I never have):
|
|
<code>${</code><em>param</em><code>?</code><em>message</em><code>}</code> tests if <em>param</em> is set (no colon), and if it
|
|
isn't, prints the message and exits the shell. An interactive shell
|
|
won't exit, but it will return you immediately to the prompt, skipping
|
|
anything else stored up for execution. It's a rudimentary safety
|
|
feature, a little bit like `assert' in C programmes; most shell
|
|
programmers seem to cover the case of missing parameter settings by more
|
|
verbose tests. It's quite neat in short shell functions for interactive
|
|
use:</p>
|
|
<pre><code> mless() { mtype ${@:?missing filename} | $PAGER }
|
|
</code></pre>
|
|
<p><strong>Standard forms: pattern removal</strong></p>
|
|
<p>Most of the more sophisticated Bourne-like shells define two pairs of
|
|
pattern operators, which I shall call `top and tail' operators. One
|
|
pair (using `<code>#</code>' and `<code>##</code>') removes a given pattern from the head of
|
|
the string, returning the rest, while the other pair (using `<code>%</code>' and
|
|
`<code>%%</code>') removes a pattern from the tail of the string. In each case,
|
|
the form with one symbol removes the shortest matching pattern, while
|
|
the one with two symbols removes the longest matching pattern. Two
|
|
typical uses are:</p>
|
|
<pre><code> % print $HOME
|
|
/home/pws
|
|
% print ${HOME##*/}
|
|
pws
|
|
% print ${HOME%/*}
|
|
/home
|
|
</code></pre>
|
|
<p>which here have the same effect of <code>${HOME:t}</code> and and <code>${HOME:h}</code>, and
|
|
in zsh you would be more likely to use the latter. However, as you can
|
|
see the pattern forms are much more general. Note the difference from:</p>
|
|
<pre><code> % print ${HOME#*/}
|
|
home/pws
|
|
% print ${HOME%%/*}
|
|
</code></pre>
|
|
<p>where the shortest match of `<code>*/</code>' at the head was just the first
|
|
slash, since `<code>*</code>' can match an empty string, while the longest match
|
|
of `<code>/*</code>' at the tail was the entire string, right back to the first
|
|
slash. Although these are standard forms, remember that the full power
|
|
of zsh patterns is available.</p>
|
|
<p>How do you remember which operator does what? The fact that the longer
|
|
form does the longer match is probably easy. Remembering that `<code>#</code>'
|
|
removes at the head and `<code>%</code>' at the tail is harder. Try to think of
|
|
`hash' and `head' (if you call it a `pound sign', when it's nothing
|
|
of the sort since a pound since looks like `£', you will get no
|
|
sympathy from me), and `percent' and `posterior'. It never worked for
|
|
me, but maybe I just don't have the mental discipline. Oliver Kiddle
|
|
points out that `<code>#</code>' is further to the left (head) on a standard US
|
|
keyboard. On my UK keyboard, `<code>#</code>' is right next to the return key,
|
|
unfortunately, although here the confusion with `pound sign' will jog
|
|
your memory.</p>
|
|
<p>The most important thing to remember is: this notation is not our fault.
|
|
Sorry, anyway. By the way, notice there's no funny business with colons
|
|
in the case of the pattern operators. (Well --- except for the zsh
|
|
variant noted below.)</p>
|
|
<p><strong>Zsh-specific parameter substitutions</strong></p>
|
|
<p>Now for some enhancements that zsh has for using the forms of parameter
|
|
substitution I've just given as well as some similar but different ones.</p>
|
|
<p>One simple enhancement is that in addition to
|
|
`<code>${</code><em>param</em><code>=</code><em>value</em><code>}</code>' and `<code>${</code><em>param</em><code>:=</code><em>value</em><code>}</code>', zsh has
|
|
`<code>${</code><em>param</em><code>::=</code><em>value</em><code>}</code>' which performs an unconditional assignment
|
|
as well as sticking the value on the command line. It's not really any
|
|
different from using a normal assignment, then a normal parameter
|
|
substitution, except that zsh users like densely packed code.</p>
|
|
<p>All the assignment types are affected by the parameter flags `<code>A</code>' and
|
|
`<code>AA</code>' which tell the shell to perform array and associative array
|
|
assignment (in the second case, you need pairs of key/value elements as
|
|
usual). You need to be a little bit careful with array elements and word
|
|
splitting, however:</p>
|
|
<pre><code> % print -l ${(A)foo::=one two three four}
|
|
one two three four
|
|
% print ${#foo}
|
|
1
|
|
</code></pre>
|
|
<p>That made <code>$foo</code> an array all right, but treated the argument as a
|
|
scalar value and assigned it to the first element. There's a way round
|
|
this:</p>
|
|
<pre><code> % print -l ${(A)=foo::=one two three four}
|
|
one
|
|
two
|
|
three
|
|
four
|
|
% print ${#foo}
|
|
4
|
|
</code></pre>
|
|
<p>Here, the `<code>=</code>' <em>before</em> the parameter name has a completely different
|
|
effect from the others: it turns on word-splitting, just as if the
|
|
option <code>SH_WORD_SPLIT</code> is in effect. You may remember I went into this
|
|
in appalling detail in the section `Function parameters' in <a href="zshguide03.html#syntax">chapter
|
|
3</a>.</p>
|
|
<p>You should be careful, however, as more sophisticated attempts at
|
|
putting arrays inside parameter values can easily lead you astray. It's
|
|
usually much easier to use the `<em>array</em><code>=</code>(<em>...</em>)' or `<code>set -A</code> <em>...</em>'
|
|
notations.</p>
|
|
<p>One extremely useful zsh enhancement is the notation `<code>${+foo}</code>' which
|
|
returns 1 if <code>$foo</code> is set and 0 if it isn't. You can use this in
|
|
arithmetic expressions. This is a much more flexible way of dealing with
|
|
possibly unset parameters than the more standard `<code>${foo?goodbye}</code>'
|
|
notation, and consequently is better used by zsh programmers. The
|
|
notation `plus foo' for `foo is set' should be fairly memorable, too.
|
|
A more standard way of doing this (noted by David Korn) is
|
|
`<code>0${foo+1}</code>', giving 0 if <code>$foo</code> is not set and 01 if it is.</p>
|
|
<p><strong>Parameter flags and pattern substitutions</strong></p>
|
|
<p>Zsh increases the usefulness of the `top and tail' operators with some
|
|
of its parameter flags. Usually these show you what's left after the
|
|
removal of some matched portion. However, with the flag <code>(M)</code> the shell
|
|
will instead show you the matched portion itself. The flag <code>(R)</code> is the
|
|
opposite and shows the rest: that's not all that useful in the normal
|
|
case, since you get that by default. It only starts being useful when
|
|
you combine it with other flags.</p>
|
|
<p>Next, zsh allows you to match on substrings, not just on the head or
|
|
tail. You can do this by giving the flag <code>(S)</code> with either of the `<code>#</code>'
|
|
or `<code>%</code>' pattern-matching forms. The difference here is whether the
|
|
shell starts searching for a matching substring at the start or end of
|
|
the full string. Let's take</p>
|
|
<pre><code> foo='where I was huge lizards walked here and there'
|
|
</code></pre>
|
|
<p>and see what we get matching on `<code>h*e</code>':</p>
|
|
<pre><code> % print -l ${(S)foo#h*e} ${(S)foo##h*e} ${(S)foo%h*e} ${(S)foo%%h*e}
|
|
wre I was huge lizards walked here and there
|
|
w
|
|
where I was huge lizards walked here and tre
|
|
where I was huge lizards walked here and t
|
|
</code></pre>
|
|
<p>There are some odd discrepancies at first sight, but here's what
|
|
happens. In the first case, `<code>#</code>' the shell looks forward until it
|
|
finds a match for `<code>h*e</code>', and takes the shortest, which is the `<code>he</code>'
|
|
in the first word. With `<code>##</code>', the match succeeds at the same point,
|
|
but the longest match extends to the `<code>e</code>' right at the end of the
|
|
string. With the other two forms, the shell starts scanning backwards
|
|
from the end, and stops as soon as it reaches a starting point which has
|
|
a match. For both `<code>%</code>' and `<code>%%</code>' this is the last `<code>h</code>', but the
|
|
former matches `<code>he</code>' and the latter matches `<code>here</code>'.</p>
|
|
<p>You can extend this by using the <code>(I)</code> flag to specify a numeric index.
|
|
The index needs to be delimited, conventionally, although not
|
|
necessarily, by colons. The shell will then scan forward or backward,
|
|
depending on the form used, until it has found the <code>(I)</code>'th match. Note
|
|
that it only ever counts a single match from each position, either the
|
|
longest or the shortest, so the <code>(I)</code>'th match starts from the <code>(I)</code>'th
|
|
position which has any match. Here's what happens when we remove all the
|
|
matches for `<code>#</code>' using the example above.</p>
|
|
<pre><code> % for (( i = 1; i <= 5; i++ )); do
|
|
for> print ${(SI:$i:)foo#h*e}
|
|
for> done
|
|
wre I was huge lizards walked here and there
|
|
where I was lizards walked here and there
|
|
where I was huge lizards walked re and there
|
|
where I was huge lizards walked here and tre
|
|
where I was huge lizards walked here and there
|
|
</code></pre>
|
|
<p>Each time we match and remove one of the possible `<code>h*e</code>' sets where
|
|
there is no `<code>e</code>' in the middle, moving from left to right. The last
|
|
time there was nothing left to match and the complete string was
|
|
returned. Note that the index we used was itself a parameter.</p>
|
|
<p>It's obvious what happens with `<code>##</code>': it will find matches at all the
|
|
same points, but they will all extend to the `<code>e</code>' at the end of the
|
|
string. It's probably less obvious what happens with `<code>%%</code>' and `<code>%</code>',
|
|
but if you try it you will find they produce just the same set of
|
|
matches as `<code>##</code>' and `<code>#</code>', respectively, but with the indices in the
|
|
reverse order (4 for 1, 3 for 2, etc.).</p>
|
|
<p>You can use the `<code>M</code>' flag to leave the matched portion rather than the
|
|
rest of the string, if you like. There are three other flags which let
|
|
you get the indices associated with the match instead of the string:
|
|
<code>(B)</code> for the beginning, using the usual zsh convention where the first
|
|
character is 1, <code>(E)</code> for the character <em>after</em> the end, and <code>(N)</code> for
|
|
the length, simply <code>B-E</code>. You can even have more than one of these; the
|
|
value substituted is a string with the given values with spaces between,
|
|
always in the order beginning, end, length.</p>
|
|
<p>There is a sort of opposite to the `<code>(S)</code>' flag, which instead of
|
|
matching substrings will only match the whole string; to do this, put a
|
|
colon before the `<code>#</code>'. Hence:</p>
|
|
<pre><code> % print ${foo:#w*g}
|
|
where I was huge lizards walked here and there
|
|
% print ${foo:#w*e}
|
|
|
|
%
|
|
</code></pre>
|
|
<p>The first one didn't match, because the `<code>g</code>' is not at the end; the
|
|
second one did, because there is an `<code>e</code>' at the end.</p>
|
|
<p><strong>Pattern replacement</strong></p>
|
|
<p>The most powerful of the parameter pattern-matching forms has been
|
|
borrowed from bash and ksh93; it doesn't occur in traditional Bourne
|
|
shells. Here, you use a pair of `<code>/</code>'s to indicate a pattern to be
|
|
replaced, and its replacement. Lets use the lizards again:</p>
|
|
<pre><code> % print ${foo/h*e/urgh}
|
|
wurgh
|
|
</code></pre>
|
|
<p>A bit incomprehensible: that's because like most pattern matchers it
|
|
takes the longest match unless told otherwise. In this case the <code>(S)</code>
|
|
flag has been pressed into service to mean not a substring (that's
|
|
automatic) but the shortest match:</p>
|
|
<pre><code> % print ${(S)foo/h*e/urgh}
|
|
wurghre I was huge lizards walked here and there
|
|
</code></pre>
|
|
<p>That only replace the first match. This is where `<code>//</code>' comes in; it
|
|
replaces every match:</p>
|
|
<pre><code> % print ${(S)foo//h*e/urgh}
|
|
wurghre I was urgh lizards walked urghre and turghre
|
|
</code></pre>
|
|
<p>(No doubt you're starting to feel like a typical anachronistic Hollywood
|
|
cave-dweller already.) Note the syntax: it's a little bit like
|
|
substitution in <code>sed</code> or perl, but there's no slash at the end, and with
|
|
`<code>//</code>' only the first slash is doubled. It's a bit confusing that with
|
|
the other pattern expressions the single and double forms mean the
|
|
shortest and longest match, while here it's the flag <code>(S)</code> that makes
|
|
the difference.</p>
|
|
<p>The index flag <code>(I)</code> is useful here, too. In the case of `<code>/</code>', it
|
|
tells the shell which single match to substitute, and in the case of
|
|
`<code>//</code>' it tells the shell at which match to start: all matches starting
|
|
from that are replaced.</p>
|
|
<p>Overlapping matches are never replaced by `<code>//</code>'; once it has put the
|
|
new text in for a match, that section is not considered further and the
|
|
text just to its right is examined for matches. This is probably
|
|
familiar from other substitution schemes.</p>
|
|
<p>You may well be thinking `wouldn't it be good to be able to use the
|
|
matched text, or some part of it, in the replacment text?' This is what
|
|
you can do in sed with `<code>\1</code>' or `<code>\&</code>' and in perl with `<code>$1</code>' and
|
|
`<code>$&</code>'. It turns out this <em>is</em> possible with zsh, due to part of the
|
|
more sophisticated pattern matching features. I'll talk about this when
|
|
we come on to patterns, since it's not really part of parameter
|
|
substitution, although it's designed to work well with that.</p>
|
|
<p><span id="l124"></span></p>
|
|
<h3 id="544-flags-for-options-splitting-and-joining"><a class="header" href="#544-flags-for-options-splitting-and-joining">5.4.4: Flags for options: splitting and joining</a></h3>
|
|
<p>There are three types of flag that don't look like flags, for historical
|
|
reasons; you've already seen them in <a href="zshguide03.html#syntax">chapter
|
|
3</a>. The first is the one that turns on the
|
|
<code>SH_WORD_SPLIT</code> option, <code>${=foo}</code>. Note that you can mix this with flags
|
|
that <em>do</em> look like flags, in parentheses, in which case the `<code>=</code>' must
|
|
come after the closing parenthesis. You can force the option to be
|
|
turned off for a single substitution by doubling the symbol:
|
|
`<code>${==foo}</code>'. However, you wouldn't do that unless the option was
|
|
already set, in which case you are probably trying to be compatible with
|
|
some other shell, and wouldn't want to use that form.</p>
|
|
<p>More control over splitting and joining is possible with three of the
|
|
more standard type of flags, <code>(s)</code>, <code>(j)</code> and <code>(z)</code>. These do splitting
|
|
on a given string, joining with a given string, and splitting just the
|
|
way the shell does it, respectively. In the first two cases, you need to
|
|
specify the string in the same way as you specified the index for the
|
|
<code>(I)</code> flag. So, for example, here's how to turn <code>$PATH</code> into an ordinary
|
|
array without using <code>$path</code>:</p>
|
|
<pre><code> % print -l ${(s.:.)PATH}
|
|
/home/pws/bin
|
|
/usr/local/bin
|
|
/usr/sbin
|
|
/sbin
|
|
/bin
|
|
/usr/bin
|
|
/usr/X11R6/bin
|
|
/usr/games
|
|
</code></pre>
|
|
<p>Any character can follow the <code>(s)</code> or <code>(j)</code>; the string argument lasts
|
|
until the matching character, here `<code>.</code>'. If the character is one of
|
|
the bracket-like characters including `<code><</code>', the `matching' character
|
|
is the corresponding right bracket, e.g. `<code>${(s<:>)PATH}</code>' and
|
|
`<code>${(s(:))PATH}</code>' are both valid. This applies to all flags that need
|
|
arguments, including <code>(I)</code>.</p>
|
|
<p>Although the split or join string isn't a pattern, it doesn't have to be
|
|
a single character:</p>
|
|
<pre><code> % foo=(array of words)
|
|
% print ${(j.**.)foo}
|
|
array**of**words
|
|
</code></pre>
|
|
<p>The <code>(z)</code> flag doesn't take an argument. As it handles splitting on the
|
|
full shell definition of a word, it goes naturally with quoted
|
|
expressions, and I discussed above its use with the <code>(Q)</code> flag for
|
|
extracting words from a line with the quotes removed.</p>
|
|
<p>It's possible for the same parameter expression to have both splitting
|
|
and joining applied to it. This always occurs in the same order,
|
|
regardless of how you specify the flags: joining first, then splitting.
|
|
This is described in the (rather hairy) complete set of rules in the
|
|
manual entry for parameter substitution. There are one or two occasions
|
|
where this can be a bit surprising. One is when you have <code>SH_WORD_SPLIT</code>
|
|
set and try to join a string:</p>
|
|
<pre><code> % setopt shwordsplit
|
|
% foo=('another array' of 'words with spaces')
|
|
% print -l ${(j.:.)foo}
|
|
another
|
|
array:of:words
|
|
with
|
|
spaces
|
|
</code></pre>
|
|
<p>You might not have noticed if you didn't use the `<code>-l</code> option to print,
|
|
but the spaces still caused word-spliting even though you asked for the
|
|
array to be joined with colons. To avoid this, either don't use
|
|
<code>SH_WORD_SPLIT</code> (my personal preference), or use quotes:</p>
|
|
<pre><code> % print -l "${(j.:.)foo}"
|
|
another array:of:words with spaces
|
|
</code></pre>
|
|
<p>The elements of an array would normally be joined by spaces in this
|
|
case, but the character specified by the <code>(j)</code> flag takes precedence. In
|
|
just the same way, if <code>SH_WORD_SPLIT</code> is in effect, any splitting string
|
|
given by <code>(s)</code> is used instead of the normal set of characters, which
|
|
are any characters that occur in the string <code>$IFS</code>, by default space,
|
|
tab, newline and NUL.</p>
|
|
<p>Specifying a split for a particular parameter substitution not only sets
|
|
the string to split on, but also ensures the split will take place even
|
|
if the expression is quoted:</p>
|
|
<pre><code> % array=('element one' 'element two' 'element three')
|
|
% print -l "${=array}"
|
|
element
|
|
one
|
|
element
|
|
two
|
|
element
|
|
three
|
|
</code></pre>
|
|
<p>To be clear about what's happening here: the quotes force the elements
|
|
to be joined with spaces, giving a single string, which is then split on
|
|
the original spaces as well as the one used to join the elements of the
|
|
array.</p>
|
|
<p>I will talk shortly about nested parameter substitution; you should also
|
|
note that splitting and joining will if necessary take place at all
|
|
levels of a nested substitution, not just the outermost one:</p>
|
|
<pre><code> % foo="three blind words"
|
|
% print ${#${(z)foo}}
|
|
3
|
|
</code></pre>
|
|
<p>This prints the length of the innermost expression; because of the
|
|
zplit, that has produced a three-element array.</p>
|
|
<p><span id="l125"></span></p>
|
|
<h3 id="545-flags-for-options-glob_subst-and-rc_expand_param"><a class="header" href="#545-flags-for-options-glob_subst-and-rc_expand_param">5.4.5: Flags for options: <code>GLOB_SUBST</code> and <code>RC_EXPAND_PARAM</code></a></h3>
|
|
<p>The other two flags that don't use parentheses affect options for single
|
|
substitutions, too. The second is the `<code>~</code>' flag that turns on
|
|
<code>GLOB_SUBST</code>, making the result of a parameter substitution eligible for
|
|
pattern matching. As the notation is supposed to indicate, it also makes
|
|
filename expansion possible, so</p>
|
|
<pre><code> % foo='~'
|
|
% print ${~foo}
|
|
/home/pws
|
|
</code></pre>
|
|
<p>It's that first `<code>~</code>' which is giving the home directory; the one in
|
|
the parameter expansion simply allows that to happen. If you have
|
|
<code>GLOB_SUBST</code> set, you can use `<code>${~~foo}</code>' to turn it off for one
|
|
substitution.</p>
|
|
<p>There's one other of these option flags: `<code>^</code>' forces on
|
|
<code>RC_EXPAND_PARAM</code> for the current substitution, and `<code>^^</code>' forces it
|
|
off. In <a href="zshguide03.html#syntax">chapter 3</a>, I showed how parameters
|
|
expanded with this option on fitted in with brace expansions.</p>
|
|
<p><span id="l126"></span></p>
|
|
<h3 id="546-yet-more-parameter-flags"><a class="header" href="#546-yet-more-parameter-flags">5.4.6: Yet more parameter flags</a></h3>
|
|
<p>Here are a few other parameter flags; I'm repeating some of these. A
|
|
very useful one is `<code>t</code>' to tell you the type of a parameter. This came
|
|
up in <a href="zshguide03.html#syntax">chapter 3</a> as well. It's most common use
|
|
is to test the basic type of the parameter before trying to use it:</p>
|
|
<pre><code> if [[ ${(t)myparam} != *assoc* ]]; then
|
|
# $myparam is not an associative array. Do something about it.
|
|
fi
|
|
</code></pre>
|
|
<p>Another very useful type is for left or right padding of a string, to a
|
|
specified length, and optionally with a specified fill string to use
|
|
instead of space; you can even specify a one-off string to go right next
|
|
to the string in question.</p>
|
|
<pre><code> foo='abcdefghij'
|
|
for (( i = 1; i <= 10; i++ )); do
|
|
goo=${foo[1,$i]}
|
|
print ${(l:10::X::Y:)goo} ${(r:10::X::Y:)goo}
|
|
done
|
|
</code></pre>
|
|
<p>prints out the rather pretty:</p>
|
|
<pre><code> XXXXXXXXYa aYXXXXXXXX
|
|
XXXXXXXYab abYXXXXXXX
|
|
XXXXXXYabc abcYXXXXXX
|
|
XXXXXYabcd abcdYXXXXX
|
|
XXXXYabcde abcdeYXXXX
|
|
XXXYabcdef abcdefYXXX
|
|
XXYabcdefg abcdefgYXX
|
|
XYabcdefgh abcdefghYX
|
|
Yabcdefghi abcdefghiY
|
|
abcdefghij abcdefghij
|
|
</code></pre>
|
|
<p>Note that those colons (which can be other characters, as I explained
|
|
for the <code>(s)</code> and <code>(j)</code> flags) always occur in pairs before and after
|
|
the argument, so that with three arguments, the colons in between are
|
|
doubled. You can miss out the `<code>:Y:</code>' part and the `<code>:X:</code>' part and
|
|
see what happens. The fill strings don't need to be single characters;
|
|
if they don't fit an exact number of times into the filler space, the
|
|
last repetition will be truncated on the end furthest from the parameter
|
|
argument being inserted.</p>
|
|
<p>Two parameters tell the shell that you want something special done with
|
|
the value of the parameter substitution. The <code>(P)</code> flag forces the value
|
|
to be treated as a parameter name, so that you get the effect of a
|
|
double substitution:</p>
|
|
<pre><code> % final=string
|
|
% intermediate=final
|
|
% print ${(P)intermediate}
|
|
string
|
|
</code></pre>
|
|
<p>This is a bit as if <code>$intermediate</code> were what in ksh is called a
|
|
`nameref', a parameter that is marked as a reference to another
|
|
parameter. Zsh may eventually have those, too; there are places where
|
|
they are a good deal more convenient than the `<code>(P)</code>' flag.</p>
|
|
<p>A more powerful flag is <code>(e)</code>, which forces the value to be rescanned
|
|
for all forms of single-word substitution. For example,</p>
|
|
<pre><code> % foo='$(print $ZSH_VERSION)'
|
|
% print ${(e)foo}
|
|
4.0.2
|
|
</code></pre>
|
|
<p>made the value of <code>$foo</code> be re-examined, at which point the command
|
|
substitution was found and executed.</p>
|
|
<p>The remaining flags are a few simple special formatting tricks: order
|
|
array elements in normal lexical (character) order with <code>(o)</code>, order in
|
|
reverse order with <code>(O)</code>, do the same case-independently with <code>(oi)</code> or
|
|
<code>(Oi)</code> respectively, expand prompt `<code>%</code>'-escapes with <code>(%)</code> (easy to
|
|
remember), expand backslash escapes as <code>print</code> does with <code>p</code>, force all
|
|
characters to uppercase with <code>(U)</code> or lowercase with <code>(L)</code>, capitalise
|
|
the first character of the string or each array element with <code>(C)</code>, show
|
|
up special characters as escape sequences with <code>(V)</code>. That should be
|
|
enough to be getting on with.</p>
|
|
<p><span id="l127"></span></p>
|
|
<h3 id="547-a-couple-of-parameter-substitution-tricks"><a class="header" href="#547-a-couple-of-parameter-substitution-tricks">5.4.7: A couple of parameter substitution tricks</a></h3>
|
|
<p>I can't resist describing a couple of extras.</p>
|
|
<p>Zsh can do so much on parameter expressions that sometimes it's useful
|
|
even without a parameter! For example, here's how to get the length of
|
|
a fixed string without needing to put it into a parameter:</p>
|
|
<pre><code> % print ${#:-abcdefghijklm}
|
|
13
|
|
</code></pre>
|
|
<p>If the parameter whose name you haven't given has a zero length (it
|
|
does, because there isn't one), use the string after the `<code>:-</code>'
|
|
instead, and take it's length. Note you need the colon, else you are
|
|
asking the shell to test whether a parameter is set, and it becomes
|
|
rather upset when it realises there isn't one to test. Other shells are
|
|
unlikely to tolerate any such syntactic outrages at all; the <code>#</code> in that
|
|
case is likely to be treated as <code>$#</code>, the number of shell arguments. But
|
|
zsh knows that's not going to have zero length, and assumes you know
|
|
what you're doing with the extra part; this is useful, but technically a
|
|
violation of the rules.</p>
|
|
<p>Sometimes you don't need anything more than the flags. The most useful
|
|
case is making the `fill' flags generate repeated words, with the
|
|
effect of perl's `<code>x</code>' operator (for those not familiar with perl, the
|
|
expression `<code>"string" x 3</code>' produces the string `stringstringstring'.
|
|
Here, you need to remember that the fill width you specify is the total
|
|
width, not the number of repetitions, so you need to multiply it by the
|
|
length of the string:</p>
|
|
<pre><code> % print ${(l.18..string.)}
|
|
stringstringstring
|
|
</code></pre>
|
|
<p><span id="l128"></span></p>
|
|
<h3 id="548-nested-parameter-substitutions"><a class="header" href="#548-nested-parameter-substitutions">5.4.8: Nested parameter substitutions</a></h3>
|
|
<p>Zsh has a system for multiple nested parameter substitutions. Whereas in
|
|
most shells or other scripting languages you would do something like:</p>
|
|
<pre><code> % p=/directory/file.ext
|
|
% p2=${p##*/} # remove longest match of */ from head
|
|
% print $p2
|
|
file.ext
|
|
% print ${p%.*} # remove shortest match of .* from tail
|
|
file
|
|
</code></pre>
|
|
<p>in zsh you can do this in one substitution:</p>
|
|
<pre><code> % p=/directory/file.ext
|
|
% print ${${p##*/}%.*}
|
|
file
|
|
</code></pre>
|
|
<p>saving the temporary parameter in the middle. (Again, you are more
|
|
likely to use <code>${p:t:r}</code> in this particular case.) Where this becomes a
|
|
major advantage is with arrays: if <code>$p</code> is an array, all the
|
|
substitutions are applied to every element of the array:</p>
|
|
<pre><code> % p=(/dir1/file1.ext1 /dir2/file2.ext2)
|
|
% print ${${p##*/}%.*}
|
|
file1 file2
|
|
</code></pre>
|
|
<p>This can result in some considerable reductions in the code for
|
|
processing arrays. It's a way of getting round the fact that an ordinary
|
|
command line interface like zsh, designed originally for direct
|
|
interaction with the user, doesn't have all the sophistication of a
|
|
non-interactive language like perl, whose `<code>map</code>' function would
|
|
probably be the neatest way of doing the same thing:</p>
|
|
<pre><code> # Perl code.
|
|
@p = qw(/dir1/file1.ext1 /dir2/file2.ext2);
|
|
@q = map { m%^(?:.*/)(.*?)(?:\.[^.]*|)$%; } @p;
|
|
print "@q\n";'
|
|
</code></pre>
|
|
<p>or numerous possible variants. In a shell, there's no way of putting
|
|
functions like that into the command line without complicating the basic
|
|
`command, arguments' syntax; so we resort to trickery with
|
|
substitutions. Note, however, that this degree of brevity makes for a
|
|
certain lack of readability even in Perl. Furthermore, zsh is so
|
|
optimised for common cases that</p>
|
|
<pre><code> print ${p:t:r}
|
|
</code></pre>
|
|
<p>will work for both arrays and scalars: the <code>:t</code> takes only the tail of
|
|
the filename, stripping the directories, and the <code>:r</code> removes the
|
|
suffix. These two operators could have slightly unexpected effects in
|
|
versions of zsh before 4.0.1, removing `suffixes' which contained
|
|
directory paths, for example (though this is what the pattern forms
|
|
taken separately do, too).</p>
|
|
<p>Note one feature of the nested substitution: you might have expected the
|
|
`<code>${...}</code>' inside the other one to do a full parameter substitution, so
|
|
that the outer one would act on the value of that --- that's what you'd
|
|
get if the substitution was on its own, after all. However, that's not
|
|
what happens: the `<code>${...}</code>' inside is simply a syntactic trick to say
|
|
`here come more operations on the parameter'. This means that</p>
|
|
<pre><code> bar='this doesn'\''t get substituted'
|
|
foo='bar'
|
|
print ${${foo}}
|
|
</code></pre>
|
|
<p>simply prints `<code>bar</code>', not the value of <code>$bar</code>. This is the same case
|
|
we had before but without any of the extra `<code>##</code>' and `<code>%</code>' bits. The
|
|
reason is historical: when the extremely useful nested substitution
|
|
feature was added, it was much simpler to have the leading `<code>$</code>'
|
|
indicate to the shell that it should call the substitution function
|
|
again than find another syntax. You can make the value be re-interpreted
|
|
as another parameter substitution, using the <code>(P)</code> substitution flag
|
|
described above. Just remember that <code>${${foo}}</code> and <code>${(P)foo}</code> are
|
|
different.</p>
|
|
<p><span id="l129"></span></p>
|
|
<h2 id="55-that-substitution-again"><a class="header" href="#55-that-substitution-again">5.5: That substitution again</a></h2>
|
|
<p>Finally, here is a brief explanation of how to read the expression at
|
|
the top of the chapter. This is for advanced students only (nutcases, if
|
|
you ask me). You can find all the bits in the manual, if you try hard
|
|
enough, even the ones I didn't get around to explaining above. As an
|
|
example, let's suppose the array contains</p>
|
|
<pre><code> array=(long longer longest short brief)
|
|
</code></pre>
|
|
<p>and see what</p>
|
|
<pre><code> print ${array[(r)${(l.${#${(O@)array//?/X}[1]}..?.)}]}
|
|
</code></pre>
|
|
<p>gives.</p>
|
|
<ol>
|
|
<li>
|
|
<p>Always start from the inside. The innermost expression here is</p>
|
|
<pre><code> ${(O@)array//?/X}
|
|
</code></pre>
|
|
<p>Not much clearer? Start from the inside again: there's the parameter
|
|
we're operating on, whose name is <code>array</code>. Before that there are two
|
|
flags in parenthesis: (<code>O</code>) says sort the result in descending
|
|
alphabetic order, (<code>@</code>) treat the result as an array, which is
|
|
necessary because this inner substitution occurs where a scalar
|
|
value (actually, an arithmetic expression) would usually occur, and
|
|
we need to take an array element. After the array name, `<code>//?/X</code>'
|
|
is a global substitution: take the pattern `<code>?</code>' (any character)
|
|
wherever it occurs, and replace it with the string `<code>X</code>'. The
|
|
result of this is an array like <code>$array</code>, but with all the elements
|
|
turned into strings consisting of `<code>X</code>'s in place of the original
|
|
characters, and with the longest first, because that's how reverse
|
|
alphabetic order works for strings with the same character. So</p>
|
|
<pre><code> long longer longest short brief
|
|
</code></pre>
|
|
<p>would have become</p>
|
|
<pre><code> XXXXXXX XXXXXX XXXXX XXXXX XXXX
|
|
</code></pre>
|
|
</li>
|
|
<li>
|
|
<p>Next, we have `<code>${#</code><em>result</em><code>[1]}</code>' wrapped around that. That means
|
|
that we take the first element of the array we arrived at above (the
|
|
`<code>[1]</code>': that's why we had to make sure it was treated as an
|
|
array), and then take the length of that (the `<code>#</code>'). We will end
|
|
up in this case with 7, the length of the first (and longest
|
|
element). We're finally getting somewhere.</p>
|
|
</li>
|
|
<li>
|
|
<p>The next step is the `<code>${</code>(<code>l.</code><em>result</em><code>..?.</code>)<code>}</code>'. Our previous
|
|
<em>result</em> appears as an argument to the `<code>(l)</code>' flag of the
|
|
substitution. That's a rather special case of nested substitution:
|
|
at this point, the shell expects an arithmetical expression, giving
|
|
the minimum length of a string to be filled on the left. The
|
|
previous substitution was evaluated because arithmetic expressions
|
|
undergo parameter substitution. So it is the result of that, 7,
|
|
which appears here, giving the more manageable</p>
|
|
<pre><code> ${(l.7..?.)}
|
|
</code></pre>
|
|
<p>The expression for the `<code>(l)</code>' flag in full says `fill the result
|
|
of this parameter substitution to a minimum width of 7 using the
|
|
fill character `<code>?</code>'. What is the substitution we are filling? It's
|
|
empty: zsh is smart enough to assume you know what you're doing when
|
|
you don't give a parameter name, and just puts in an empty string
|
|
instead. So the empty string is filled out to length 7 with question
|
|
marks, giving `<code>???????</code>'.</p>
|
|
</li>
|
|
<li>
|
|
<p>Now we have `<code>${array[(r)???????]}</code>'. It may not be obvious
|
|
(congratulations if the rest is), but the question marks are active
|
|
as a pattern. Subscripts are treated specially in this respect. The
|
|
subscript flag `<code>(r)</code>' means `reverse match', not reverse as in
|
|
backwards, but as in the opposite way round: search the array itself
|
|
for a matching value, rather than taking this as an index. The only
|
|
thing that will match this is a string of length 7. Bingo! that
|
|
must be the element `longest' in this case. If there were other
|
|
elements of the same length, you would only get the first of that
|
|
length; I haven't thought of a way of getting all the elements of
|
|
that length substituted by a single expression without turning
|
|
<code>$array</code> into an associative array, so if you have, you should feel
|
|
smug.</p>
|
|
</li>
|
|
</ol>
|
|
<p>After I wrote this, Sven Wischnowsky (who is responsible for a large
|
|
fraction of the similar hieroglyphics in the completion functions)
|
|
pointed out that a similar way of achieving this is:</p>
|
|
<pre><code> print ${(M)array:#${~${(O@)array//?/?}[1]}}
|
|
</code></pre>
|
|
<p>which does indeed show all the elements of the maximum length. A brief
|
|
summary of how this works is that the innermost expression produces an
|
|
array of `<code>?</code>' corresponding to the elements, longest first in the way
|
|
we did above, turning the `<code>?</code>' into pattern match characters. The next
|
|
expansion picks the longest. Finally, the outermost expansion goes
|
|
through <code>$array</code> to find elements which match the complete string of
|
|
`<code>?</code>' and selects out those that do match.</p>
|
|
<p>If you are wondering about how to do that in perl in a single
|
|
expression, probably sorting on length is the easiest:</p>
|
|
<pre><code> # Perl code
|
|
@array = qw(long longer longest short brief);
|
|
@array = sort { length $b <=> length $a } @array;
|
|
</code></pre>
|
|
<p>and taking out the first element or first few elements of <code>@array</code>.
|
|
However, in a highly-optimized scripting language you would almost
|
|
certainly do it some other way: for example, avoid sorting and just
|
|
remember the longest element:</p>
|
|
<pre><code> # Perl code
|
|
$elt = '';
|
|
$l = 0;
|
|
foreach (@array) {
|
|
$newl = length $_;
|
|
$elt = $_, $l = $newl if $l > $newl;
|
|
}
|
|
print $elt, "\n";
|
|
</code></pre>
|
|
<p>You can do just the same thing in zsh easily enough in this case;</p>
|
|
<pre><code> local val elt
|
|
integer l newl
|
|
for val in $array; do
|
|
newl=${#val}
|
|
if (( newl > l )); then
|
|
elt=$val
|
|
(( l = newl ))
|
|
fi
|
|
done
|
|
print $elt
|
|
</code></pre>
|
|
<p>so this probably isn't a particularly good use for nested substitution,
|
|
even though it illustrates its power.</p>
|
|
<p>If you enjoyed that expression, there are many more like it in the
|
|
completion function suite for you to goggle at.</p>
|
|
<p><span id="l130"></span></p>
|
|
<h2 id="56-arithmetic-expansion"><a class="header" href="#56-arithmetic-expansion">5.6: Arithmetic Expansion</a></h2>
|
|
<p>Performing mathematics within the shell was first described in <a href="zshguide03.html#syntax">chapter
|
|
3</a> where I showed how to create numeric
|
|
parameters with variants of `<code>typeset</code>', and said a little about
|
|
arithmetic substitution.</p>
|
|
<p>In addition to the math library, loadable with `<code>zmodload zsh/mathfunc</code>', zsh has essentially all the operators you expect from C
|
|
and other languages derived from it. In other words, things like</p>
|
|
<pre><code> (( foo = bar ? 3 : 1, ++brr ))
|
|
</code></pre>
|
|
<p>are accepted. The comma operator works just as in C; all the arguments
|
|
are evaluated, in this case `<code>foo = bar ? 3 : 1</code>' assigns 3 or 1 to
|
|
<code>$foo</code> depending whether or not <code>bar</code> is non-zero, and then <code>$brr</code> is
|
|
incremented by 1. The return status is determined by the final
|
|
expression, so if <code>$brr</code> is zero after increment the return status is
|
|
one, else it is zero (integers may be negative).</p>
|
|
<p>One extra operator has been borrowed from FORTRAN, or maybe Perl, the
|
|
exponentiation operator, `<code>**</code>'. This can take either integers or
|
|
floating point numbers, though a negative exponent will cause a floating
|
|
point number to be returned, so `<code>$(( 2 ** -1 ))</code>' gives you 0.5, not
|
|
rounded down to zero. This is why the standard library function <code>pow</code> is
|
|
missing from <code>zsh/mathfunc</code> --- it's already there in that other form.
|
|
Pure integer exponentiation, however, is done by repeated multiplication
|
|
--- up to arbitrary sizes, so instead of `<code>2 ** 100</code>', you should use
|
|
`<code>1 << 100</code>', and for powers of any other integer where you don't need
|
|
an exact result, you should use floating point numbers. For this
|
|
purpose, the <code>zsh/mathfunc</code> library makes `casts' available;
|
|
`<code>float</code>(<em>num</em>)' forces the expression <em>num</em> to interpreted as a
|
|
floating point number, whatever it would otherwise have given, although
|
|
the trick of adding `<code>0.0</code>' to a number works as well. Note that,
|
|
although this works like a cast in C, the syntax is that of an ordinary
|
|
function call. Likewise, `<code>int</code>(<em>num</em>)' causes the number to be
|
|
interpreted as an integer --- rounding towards zero; you can use <code>floor</code>
|
|
and <code>ceil</code> to round down or up, and <code>rint</code> to round to the nearest
|
|
integer, although these three actually produce floating point numbers.
|
|
They are standard C library functions.</p>
|
|
<p>For completeness, the assignment form of exponentiation `<code>**=</code>' also
|
|
works. I can't remember ever using it.</p>
|
|
<p>The range of integers depends on how zsh was configured on your machine.
|
|
The primary goal is to make sure integers are large enough to represent
|
|
indexes into files; on some systems where the hardware usually deals
|
|
with 32-bit integers, file sizes may be given by 64-bit integers, and
|
|
zsh will try to use 64-bit integers as well. However, zsh will test for
|
|
large integers even if no large file support is available; usually it
|
|
just requires that your compiler has some easy to recognise way of
|
|
defining 64-bit integers, such as `<code>long long</code>' which may be handled by
|
|
gcc even if it isn't by the native compiler. You can easily test; if
|
|
your zsh supports 64-bit integers, the largest available integer is:</p>
|
|
<pre><code> % print $(( 0x7FFFFFFFFFFFFFFF ))
|
|
9223372036854775807
|
|
</code></pre>
|
|
<p>and if you try adding something positive to that, you will get a
|
|
negative result due to two's complement arithmetic. This should be large
|
|
enough to count most things.</p>
|
|
<p>The range of floating point numbers is always that of a C `<code>double</code>',
|
|
which is usually also 64 bits, and internally the number is highly
|
|
likely to be in the IEEE standard form, which also affects the precision
|
|
and range you can get, though that's system specific, too. On most
|
|
systems, the math library functions handle <code>double</code>s rather than single
|
|
precision <code>float</code>s, so this is the natural choice. The cast function is
|
|
called `<code>float</code>' because, unlike C, the representation of a floating
|
|
point number is chosen for you, so the generic name is used.</p>
|
|
<p><span id="l131"></span></p>
|
|
<h3 id="561-entering-and-outputting-bases"><a class="header" href="#561-entering-and-outputting-bases">5.6.1: Entering and outputting bases</a></h3>
|
|
<p>I'll say a word or two about bases. I already said you could enter a
|
|
number with any small base in a form like `<code>2#101010</code>' or `<code>16#ffff</code>',
|
|
and that the latter could also be `<code>0xffff</code>' as in C. You can't,
|
|
however, enter octal numbers just by using a leading `<code>0</code>', which you
|
|
might expect from C. Here's an example of why not. Let's set:</p>
|
|
<pre><code> % foo=${(%):-%D}
|
|
% print $foo
|
|
01-08-06
|
|
</code></pre>
|
|
<p>The first line is another of those bogus parameter substitutions where
|
|
we gave it a literal string and a blank parameter. We also gave it the
|
|
flag `<code>(%)</code>', which forces prompt escapes to be expanded, and in
|
|
prompts `<code>(%D)</code>' is the date as <em>yy</em>-<em>mm</em>-<em>dd</em>. Let's write a short
|
|
program to find out what the date after <code>$foo</code> is. We have the luxury of
|
|
99 years to worry about the century wrapping, so we'll ignore it (and
|
|
the Gregorian calendar).</p>
|
|
<pre><code> mlens=(31 28 31 30 31 30 31 31 30 31 30 31)
|
|
date=(${(s.-.)foo}) # splits to array (01 08 23)
|
|
typeset -Z 2 incr
|
|
if (( ${date[3]} < ${mlens[${date[2]}]} )); then
|
|
# just increment day
|
|
(( incr = ${date[3]} + 1 ))
|
|
date[3]=$incr
|
|
else
|
|
# go to first of next month
|
|
date[3]=01
|
|
if (( ${date[2]} < 12 )); then
|
|
(( incr = ${date[2]} + 1 ))
|
|
date[2]=$incr
|
|
else
|
|
# happy new year
|
|
date[2]=01
|
|
(( incr = ${date[3]} + 1 ))
|
|
date[3]=$incr
|
|
fi
|
|
fi
|
|
print ${date[1]}-${date[2]}-${date[3]}
|
|
</code></pre>
|
|
<p>This will print `<code>01-08-07</code>'. Before I get to the point, various other
|
|
explanations. We forced <code>$foo</code> to be split on any `<code>-</code>' in it, giving a
|
|
three-part array. The next trick was `<code>typeset -Z 2 incr</code>', which tells
|
|
the shell that <code>$incr</code> is to be at least two characters, filled with
|
|
leading zeroes. That's how we got the `<code>07</code>' at the end, instead of
|
|
just `<code>7</code>'. There's another way of doing this: replace</p>
|
|
<pre><code>
|
|
typeset -Z 2 incr
|
|
(( incr = ${date[2]} + 1 ))
|
|
date[2]=$incr
|
|
</code></pre>
|
|
<p>with:</p>
|
|
<pre><code> date[2]=${(l.2..0.)$(( ${date[2]} + 1 ))}
|
|
</code></pre>
|
|
<p>This uses the <code>(l)</code> parameter flag to fill up to two characters with a
|
|
zero (the default is a space, so we need to specify the `<code>0</code>' this
|
|
time), using the fact that parameter operations can have a nested
|
|
<code>$</code>-substution. This second form is less standard, however.</p>
|
|
<p>Now, finally, the point. In that `$(( ${date[2]} + 1 ))', the
|
|
`<code>${date[2]}</code>' is simply the <em>scalar</em> `<code>08</code>' --- the result of
|
|
splitting an arbitrary string into an array. Suppose we used leading
|
|
zeroes to signify octal numbers. We would get something like:</p>
|
|
<pre><code> % print $(( ${date[2]} + 1 ))
|
|
zsh: bad math expression: operator expected at `8 + 1 '
|
|
</code></pre>
|
|
<p>because the expression in the substitution becomes `<code>08 + 1</code>' and an 8
|
|
can't appear in an octal number. So we would have to strip off any
|
|
otherwise harmless leading zeroes. Parsing dates, or indeed strings with
|
|
leading zeroes as padding, is a fairly common thing for a shell to do,
|
|
and octal arithmetic isn't. So by default leading zeroes don't have that
|
|
effect.</p>
|
|
<p>However, there is an option you can set, <code>OCTAL_ZEROES</code>; this is
|
|
required for compatibility with the POSIX standard. That's how I got the
|
|
error message in the previous paragraph, in fact.</p>
|
|
<p>Floating point numbers are never octal, always decimal:</p>
|
|
<pre><code> % setopt octalzeroes
|
|
% print $(( 077 ))
|
|
63
|
|
% print $(( 077.43 ))
|
|
77.430000000000007
|
|
</code></pre>
|
|
<p>The other option to do with bases is <code>C_BASES</code>, which makes hexadecimal
|
|
(and, if you have <code>OCTAL_ZEROES</code> set, octal) numbers appear in the form
|
|
that you would use as input to a C (or, once again, Perl) program.</p>
|
|
<p>How do you persuade the shell to print out numbers in a particular base
|
|
anyway? There are two ways. The first is to associate a base with a
|
|
parameter, which you do with an argument after the `<code>-i</code>' option to
|
|
typeset:</p>
|
|
<pre><code> % typeset -i 16 hexnum=32
|
|
% print $hexnum
|
|
16#20
|
|
</code></pre>
|
|
<p>This is the standard way. By the way, there's a slight catch with bases,
|
|
taken over from ksh: if you <em>don't</em> specify a base, the first assignment
|
|
will do the job for you.</p>
|
|
<pre><code> % integer anynum
|
|
% (( anynum = 16#20 ))
|
|
% print $anynum
|
|
16#20
|
|
</code></pre>
|
|
<p>Only constants with explicit bases in an expression produce this effect;
|
|
the first time `<code>anynum</code>' comes into contact with a `<em>base</em><code>#</code><em>num</em>',
|
|
or a hexadecimal or (where applicable) octal expression in the standard
|
|
C form, it will acquire a default output base. So you need to use
|
|
`<code>typeset -i 10</code>' if you don't like that.</p>
|
|
<p>Often, however, you just want to print out an expression in, say,
|
|
hexadecimal. Zsh has a shorthand for this, which is only in recent
|
|
versions (and not in other shells). Preceding an expression by
|
|
`<code>[#</code><em>base</em><code>]</code>' causes the default output base to be set to <code>base</code> with
|
|
the the usual prefix showing the base, and `<code>[##</code><em>base</em><code>]</code>' will do the
|
|
same but without the prefix, i.e. `<code>$(( [##16]255 ))</code>' is simply
|
|
`<code>FF</code>'. This has no effect on assignments to a parameter, not even on
|
|
the parameter's default output base, but it will affect the result of a
|
|
direct substitution using <code>$((...))</code>.</p>
|
|
<p><span id="l132"></span></p>
|
|
<h3 id="562-parameter-typing"><a class="header" href="#562-parameter-typing">5.6.2: Parameter typing</a></h3>
|
|
<p>Just as creating a parameter with an ordinary assignment makes it a
|
|
scalar, so creating it in an arithmetic substitution makes it either an
|
|
integer or a floating point parameter, according to the value assigned.
|
|
This is likely to be a floating point number if there was a floating
|
|
point number in the expression on the right hand side, and an integer
|
|
otherwise. However, there are reasons why a floating point number on the
|
|
right may not have this effect --- use of <code>int</code>, for example, since it
|
|
produces an integer.</p>
|
|
<p>However, relying on implicit typing in this fashion is bad. One of the
|
|
reasons is explained in the manual entry, and I can't do better than use
|
|
that example (since I wrote it):</p>
|
|
<pre><code> for (( f = 0; f < 1; f += 0.1 )); do
|
|
print $f
|
|
done
|
|
</code></pre>
|
|
<p>If you try this, and <code>$f</code> does not already exist, you will see an
|
|
endless stream of zeroes. What's happening is that the original
|
|
assignment creates <code>$f</code> as an integer to store the integer <code>0</code> in. After
|
|
printing this, <code>$f</code> is incremented by adding <code>0.1</code> to it. But once
|
|
created, <code>$f</code> remains an integer, so the resulting <code>0.1</code> is cast back to
|
|
an integer, and the resulting zero is stored back in <code>$f</code>. The result is
|
|
that <code>$f</code> is never incremented.</p>
|
|
<p>You could turn the first <code>0</code> into <code>0.0</code>, but a better way is to declare
|
|
`<code>float f</code>' before the loop. In a function, this also ensures <code>$f</code> is
|
|
local to the function.</p>
|
|
<p>If you use a scalar to store an integer or floating point, everything
|
|
will work. You don't have the problem just described, since although
|
|
<code>$f</code> contains what looks like an integer to start with, it has no
|
|
numeric type associated with it, and when you store <code>0.1</code> into <code>$f</code>, it
|
|
will happily overwrite the string `<code>0</code>'. It's a bit more inefficient to
|
|
use scalars, but actually not that much. You can't specify an output
|
|
base or precision, and in versions of zsh up to 4.0.x, there is a
|
|
problem when the parameter already has a string in it which doesn't make
|
|
sense as a numeric expression:</p>
|
|
<pre><code> % foo='/file/name'
|
|
% (( foo = 3 ))
|
|
zsh: bad math expression: operand expected at `/file/name'
|
|
</code></pre>
|
|
<p>The unexpected error comes because `<code>/file/name/</code>' is evaluated even
|
|
though the shell is about to overwrite the contents of <code>$foo</code>. Versions
|
|
of the shell from 4.1.1 have a fix for this, and the integer assignment
|
|
works as expected.</p>
|
|
<p>You need to be careful with scalars that might contain an empty string.
|
|
If you declare `<code>integer i</code>', it will immediately contain the value 0,
|
|
but if you declare `<code>typeset s</code>', the scalar <code>$s</code> will just contain the
|
|
empty string. You get away with this if you use the parameter without a
|
|
`<code>$</code>' in front:</p>
|
|
<pre><code> % typeset s
|
|
% print $(( 3 * s ))
|
|
0
|
|
</code></pre>
|
|
<p>because the math code tries to retrieve <code>$s</code>, and when it fails puts a
|
|
<code>0</code> there. However, if you explicitly use <code>$s</code>, the math code gets
|
|
confused:</p>
|
|
<pre><code> % print $(( 3 * $s ))
|
|
zsh: bad math expression: operand expected at `'
|
|
</code></pre>
|
|
<p>because `<code>$s</code>' evaluates to an empty string before the arithmetic
|
|
evaluation proper, which spoils the syntax. There's one common case
|
|
where you need to do that, and that's with positional parameters:</p>
|
|
<pre><code> % fn() { print "Twice $1 is $(( 2 * $1 ))"; }
|
|
% fn 3
|
|
Twice 3 is 6
|
|
% fn
|
|
fn: bad math expression: operand expected at `'
|
|
</code></pre>
|
|
<p>Obviously turning the `<code>$1</code>' into `<code>1</code>' means something completely
|
|
different. You can guard against this with default values:</p>
|
|
<pre><code> % fn() { print "Twice ${1:=0} is $(( 2 * $1 ))"; }
|
|
% fn
|
|
Twice 0 is 0
|
|
</code></pre>
|
|
<p>This assigns a default value for <code>$0</code> if one was not set. Since
|
|
parameter expansion is performed in one go from left to right, the
|
|
second reference to <code>$1</code> will pick up that value.</p>
|
|
<p>Note that you need to do this even if it doesn't look like the number
|
|
will be needed:</p>
|
|
<pre><code> % fn() { print $(( ${1:-0} ? $1 : 3 )); }
|
|
% fn
|
|
fn: bad math expression: operand expected at `: 3 '
|
|
</code></pre>
|
|
<p>The expression before the `<code>?</code>' evaluates to zero if <code>$1</code> is not
|
|
present, and you expect the expression after the colon to be used in
|
|
that case. But actually it's too late by then; the arithmetic expression
|
|
parser has received `<code>0 ? : 3</code>', which doesn't make sense to it, hence
|
|
the error. So you need to put in `<code>${1:-0}</code>' for the second <code>$1</code>, too
|
|
--- or <code>${1:-32}</code>, or any other number, since it won't be evaluated if
|
|
<code>$1</code> is empty, it just needs to be parsed.</p>
|
|
<p>You should note that just as you can put numbers into scalar parameters
|
|
without needing any special handling, you can also do all the usual
|
|
string-related tricks on numeric parameters, since there is automatic
|
|
conversion in the other direction, too:</p>
|
|
<pre><code> % float foo
|
|
% zmodload -i zsh/mathfunc
|
|
% (( foo = 4 * atan(1.0) ))
|
|
% print $foo
|
|
3.141592654e+00
|
|
% print ${foo%%.*}${foo##*.[0-9]##}
|
|
3e+00
|
|
</code></pre>
|
|
<p>The argument <code>-i</code> to <code>zmodload</code> tells it not to complain if the math
|
|
library is already loaded. This gives us access to <code>atan</code>. Remember,
|
|
`<code>float</code>' declares a parameter whose output includes an exponent ---
|
|
you can actually convert it to a fixed point format on the fly using
|
|
`<code>typeset -F foo</code>', which retains the value but alters the output type.
|
|
The substitution uses some <code>EXTENDED_GLOB</code> chicanery: the final
|
|
`<code>[0-9]##</code>' matches one or more occurrences of any decimal digit. So
|
|
the head of the string value of <code>$foo</code> up to the last digit after the
|
|
decimal point is removed, and the remainder appended to whatever appears
|
|
before the decimal point.</p>
|
|
<p>Starting from 4.1.1, a calculator function called <code>zcalc</code> is bundled
|
|
with the shell. You type a standard arithmetic expression and the shell
|
|
evaluates the formula and prints it out. Lines already entered are
|
|
prefixed by a number, and you can use the positional parameter
|
|
corresponding to that number to retrieve that result for use in a new
|
|
formula. The function uses <code>vared</code> to read the formulae, so the full
|
|
shell editing mechanism is available. It will also read in
|
|
<code>zsh/mathfunc</code> if that is present.</p>
|
|
<p><span id="l133"></span></p>
|
|
<h2 id="57-brace-expansion-and-arrays"><a class="header" href="#57-brace-expansion-and-arrays">5.7: Brace Expansion and Arrays</a></h2>
|
|
<p>Brace expansion, which you met in <a href="zshguide03.html#syntax">chapter 3</a>,
|
|
appears in all csh derivatives, in some versions of ksh, and in bash, so
|
|
is fairly standard. However, there are some features and aspects of it
|
|
which are only found in zsh, which I'll describe here.</p>
|
|
<p>A complication occurs when arrays are involved. Normally, unquoted
|
|
arrays are put into a command line as if there is a break between
|
|
arguments when there is a new element, so</p>
|
|
<pre><code> % array=(three separate words)
|
|
% print -l before${array}after
|
|
beforethree
|
|
separate
|
|
wordsafter
|
|
</code></pre>
|
|
<p>unless the <code>RC_EXPAND_PARAM</code> option is set, which combines the before
|
|
and after parts with <em>each</em> element, so you get:</p>
|
|
<pre><code> % print -l before${^array}after
|
|
beforethreeafter
|
|
beforeseparateafter
|
|
beforewordsafter
|
|
</code></pre>
|
|
<p>--- the `<code>^</code>' character turns on the option just for that expansion,
|
|
as `<code>=</code>' does with <code>SH_WORD_SPLIT</code>. If you think of the character as a
|
|
correction to a proof, meaning `insert a new word between the others
|
|
here', it might help you remember (this was suggested by Bart Schaefer).</p>
|
|
<p>These two ways of expanding arrays interact differently with braces; the
|
|
more useful version here is when the <code>RC_EXPAND_PARAM</code> option is on.
|
|
Here the array acts as sort of additional nesting:</p>
|
|
<pre><code> % array=(two three)
|
|
% print X{one,${^array}}Y
|
|
XoneY XtwoY XoneY XthreeY
|
|
</code></pre>
|
|
<p>with the <code>XoneY</code> tacked on each time, but because of the braces it
|
|
appears as a separate word, so there are four altogether.</p>
|
|
<p>If <code>RC_EXPAND_PARAM</code> is not set, you get something at first sight
|
|
slightly odd:</p>
|
|
<pre><code> % array=(two three)
|
|
% print X{one,$array}Y
|
|
X{one,two three}Y
|
|
</code></pre>
|
|
<p>What has happened here is that the <code>$array</code> has produced two words; the
|
|
first has `<code>X{one,</code>' tacked in front of the array's `<code>two</code>', while the
|
|
second likewise has `<code>}Y</code>' on the end of the array's `<code>three</code>'. So by
|
|
the time the shell comes to think about brace expansion, the braces are
|
|
in different words and don't do anything useful.</p>
|
|
<p>There's no obvious simple way of forcing the <code>$array</code> to be embedded in
|
|
the braces at the same level, instead of like an additional set of
|
|
braces. There are more complicated ways, of course.</p>
|
|
<pre><code> % array=(two three)
|
|
% print X${^=:-one $array}Y
|
|
XoneY XtwoY XthreeY
|
|
</code></pre>
|
|
<p>Yuk. We gave parameter substitution a string of words, the array with
|
|
<code>one</code> stuck in front, and told it to split them on spaces (this will
|
|
split on any extra spaces in elements of <code>$array</code>, unfortunately), while
|
|
setting <code>RC_EXPAND_PARAM</code>. The parameter flags are `<code>^=</code>'; the `<code>:-</code>'
|
|
is the usual `insert the following if the substitution has zero length'
|
|
operator. It's probably better just to create your own temporary array
|
|
and apply <code>RX_EXPAND_PARAM</code> to that. By the way, if you had
|
|
<code>RC_EXPAND_PARAM</code> set already, the last result would have been different
|
|
becuase the embedded <code>$array</code> would have been expanded together with the
|
|
`<code>one </code>' in front of it.</p>
|
|
<p>Braces allow numeric expressions; this works a little like in Perl:</p>
|
|
<pre><code> % print {1..10}a
|
|
1a 2a 3a 4a 5a 6a 7a 8a 9a 10a
|
|
</code></pre>
|
|
<p>and you can ask the numbers to be padded with zeroes:</p>
|
|
<pre><code> % print {01..10}b
|
|
01b 02b 03b 04b 05b 06b 07b 08b 09b 10b
|
|
</code></pre>
|
|
<p>or have them in descending order:</p>
|
|
<pre><code> % print {10..1}c
|
|
10c 9c 8c 7c 6c 5c 4c 3c 2c 1c
|
|
</code></pre>
|
|
<p>Nesting this within other braces works in the expected way, but you
|
|
can't have any extra braces inside: the syntax is fixed to number, two
|
|
dots, number, and the numbers must be positive.</p>
|
|
<p>There's also an option <code>BRACE_CCL</code> which, if the braces aren't in either
|
|
of the above forms, expands single letters and ranges of letters:</p>
|
|
<pre><code> % setopt braceccl
|
|
% print 1{abw-z}2
|
|
1a2 1b2 1w2 1x2 1y2 1z2
|
|
</code></pre>
|
|
<p>An important point to be made about braces is that they are <em>not</em> part
|
|
of filename generation; they have nothing to do with pattern matching at
|
|
all. The shell blindly generates all the arguments you specify. If you
|
|
want to generate only some arguments, depending on what files are
|
|
matched, you should use the alternative-match syntax. Compare:</p>
|
|
<pre><code> % ls
|
|
file1
|
|
% print file(1|2)
|
|
file1
|
|
% print file{1,2}
|
|
file1 file2
|
|
</code></pre>
|
|
<p>The first matches any of `<code>file1</code>' or `<code>file2</code>' it happens to find in
|
|
the directory (regardless of other files). The second doesn't look at
|
|
files in the directory at all; it simply expands the braces according to
|
|
the rules given above.</p>
|
|
<p>This point is particularly worthy of note if you have come from a
|
|
C-shell world, or use the <code>CSH_NULL_GLOB</code> option:</p>
|
|
<pre><code> csh% echo file{1,2}
|
|
file1 file2
|
|
csh% echo f*{1,2}
|
|
file1
|
|
</code></pre>
|
|
<p>(`<code>csh%</code>' is the prompt, to remind you if you're skipping through
|
|
without reading the text), where the difference occurs because in the
|
|
first case there was no pattern, so brace expansion was done on ordinary
|
|
words, while in the second case the `<code>*</code>' made pattern expansion
|
|
happen. In zsh, the sequence would be: `<code>f*{1,2}</code>' becomes `<code>f*1 f*2</code>'; the first becomes <code>file1</code> and the second fails to match. With
|
|
<code>CSH_NULL_GLOB</code> set, the failed match is simply removed; there is no
|
|
error because one pattern has succeeded in matching. This is presumably
|
|
the logic usually followed by the C shell. If you stick with
|
|
`<code>file(1|2)</code>' and `<code>f*(1|2)</code>' --- in this case you can simplify them
|
|
to `<code>file[12]</code>' and `<code>f*[12]</code>', but that's not true if you have more
|
|
than one character in either branch --- you are protected from this
|
|
difference.</p>
|
|
<p><span id="l134"></span></p>
|
|
<h2 id="58-filename-expansion"><a class="header" href="#58-filename-expansion">5.8: Filename Expansion</a></h2>
|
|
<p>Filename expansions consists of just `<code>~/...</code>', `<code>~user/...</code>',
|
|
`<code>~namedir/...</code>' and `<code>=prog</code>', where the `<code>~</code>' and `<code>=</code>' must be
|
|
the first character of a word, and the option <code>EQUALS</code> must be set (it
|
|
is by default) for the `<code>=</code>' to be special. I told you about all this
|
|
in <a href="zshguide03.html#syntax">chapter 3</a>.</p>
|
|
<p>There's really only one thing to add, and that's the behaviour of the
|
|
<code>MAGIC_EQUAL_SUBST</code> option. Assignments after <code>typeset</code> and similar
|
|
statements are handled as follows</p>
|
|
<pre><code> % typeset foo=~pws
|
|
% print $foo
|
|
/home/pws
|
|
% typeset PATH=$PATH:~pws/bin
|
|
% print ${path[-1]}
|
|
/home/pws/bin
|
|
</code></pre>
|
|
<p>It may not be obvious why this is not obvious. The point is that
|
|
`<code>typeset</code>' is an ordinary command which happens to be a shell builtin;
|
|
the arguments of ordinary commands are not assignments. However, a
|
|
special case is made here for <code>typeset</code> and its friends so that this
|
|
works, even though, as I've said repeatedly, array assignments can't be
|
|
done after <code>typeset</code>. The parameter <code>$PATH</code> isn't handled differently
|
|
from any other --- any colon in an assignment to any variable is special
|
|
in the way shown.</p>
|
|
<p>It's often useful to have this feature with commands of your own. There
|
|
is an option, <code>MAGIC_EQUAL_SUBST</code>, which spots the forms `<code>...=~...</code>'
|
|
and `<code>...=...:~...</code>' for any command at all and expands
|
|
<code>~</code>-expressions. Commands where this is particularly useful include
|
|
<code>make</code> and the GNU <code>configure</code> command used for setting up the
|
|
compilation of a software package from scratch.</p>
|
|
<p>A related new option appeared in version 4.0.2 when it became clear
|
|
there was an annoying difference between zsh and other shells such as
|
|
ksh and bash. Consider:</p>
|
|
<pre><code> export FOO=`echo hello there`
|
|
</code></pre>
|
|
<p>In ksh and bash, this exports <code>$foo</code> with the value `<code>hello there</code>'. In
|
|
zsh, however, an unquoted backquote expression forces wordsplitting, so
|
|
the line becomes</p>
|
|
<pre><code> export FOO=hello there
|
|
</code></pre>
|
|
<p>and exports <code>$FOO</code> with the value `<code>hello</code>', and <code>$there</code> with any
|
|
value it happens to have already or none if it didn't exist. This is
|
|
actually perfectly logical according to the rules, but you can set the
|
|
option <code>KSH_TYPESET</code> to have the other interpretation.</p>
|
|
<p>Normally, <code>KSH_TYPESET</code> applies only after parameter declaration
|
|
builtins, and then only in the values of an assignment. However, in
|
|
combination with <code>MAGIC_EQUAL_SUBST</code>, you will get the same behaviour
|
|
with any command argument that looks like an assignment --- actually,
|
|
anything following an `<code>=</code>' which wasn't at the start of the word, so
|
|
`<code>"hello mother, => I'm home "$(echo right now)</code>' qualifies.</p>
|
|
<p>It seems that bash behaves as if both <code>KSH_TYPESET</code> <em>and</em>
|
|
<code>MAGIC_EQUAL_SUBST</code> are always in effect.</p>
|
|
<p><span id="l135"></span></p>
|
|
<h2 id="59-filename-generation-and-pattern-matching"><a class="header" href="#59-filename-generation-and-pattern-matching">5.9: Filename Generation and Pattern Matching</a></h2>
|
|
<p>The final topic is perhaps the biggest, even richer than parameter
|
|
expansion. I'm finally going to explain the wonderful world of zsh
|
|
pattern matching. In addition to patterns as such, you will learn such
|
|
things as how to find all files in all subdirectories, searching
|
|
recursively, which have a given name, case insensitive, are at least 50
|
|
KB large, no more than a week old and owned by the root user, and
|
|
allowing up to a single error in the spelling of the name. In fact, the
|
|
required expression looks like this:</p>
|
|
<pre><code> **/(#ia1)name(LK+50mw-1u0)
|
|
</code></pre>
|
|
<p>which might appear, at first sight, a mite impenetrable. We'll work up
|
|
to it gradually.</p>
|
|
<p>To repeat: filename generation is just the same as globbing, only
|
|
longer. I use the terms interchangeably.</p>
|
|
<p><span id="l136"></span></p>
|
|
<h3 id="591-comparing-patterns-and-regular-expressions"><a class="header" href="#591-comparing-patterns-and-regular-expressions">5.9.1: Comparing patterns and regular expressions</a></h3>
|
|
<p>It can be confusing that there are two rather different sorts of pattern
|
|
around, those used for matching files on a command line as in zsh and
|
|
other shells, and those used for matching text inside files as in
|
|
<code>grep</code>, <code>sed</code>, <code>emacs</code>, <code>perl</code> and many other utilities, each of which,
|
|
typically, has a slightly different form for patterns (called in this
|
|
case `regular expressions', because UNIX was designed by computer
|
|
scientists). There are even some utilities like TCL which provide both
|
|
forms.</p>
|
|
<p>Zsh deals exclusively with the shell form, which I've been calling by
|
|
its colloquial name, `globbing', and consequently I won't talk about
|
|
regular expressions in any detail. Here are the two classic differences
|
|
to note. First, in a shell, `<code>*</code>' on its own matches any set of
|
|
characters, while in a regular expression it always refers to the
|
|
previous pattern, and says that that can be repeated any number of
|
|
times. Second, in a shell `<code>.</code>' is an ordinary (and much used)
|
|
character, while in a regular expression it means `any character',
|
|
which is specified by `<code>?</code>' in the shell. Put this together, and what a
|
|
shell calls `<code>*</code>' is given by `<code>.*</code>' in a regular expression. `<code>*</code>'
|
|
in the latter case is called a `Kleene closure': it's those computer
|
|
scientists again. In zsh, art rather than science tends to be in
|
|
evidence.</p>
|
|
<p>In fact, zsh does have many of the features available in regular
|
|
expressions, as well as some which aren't. Remember that anywhere in zsh
|
|
where you need a pattern, it's of the same form, whether it's matching
|
|
files on the command line or a string in a <code>case</code> statement. There are a
|
|
few features which only fit well into one or another use of patterns;
|
|
for example the feature that selects files by examining their type,
|
|
owner, age, etc. (the final parenthesis in the expression I showed
|
|
above) are no use in matching against a string.</p>
|
|
<p><span id="l137"></span></p>
|
|
<h3 id="592-standard-features"><a class="header" href="#592-standard-features">5.9.2: Standard features</a></h3>
|
|
<p>There is one thing to note about the simple pattern matching features
|
|
`<code>*</code>' and `<code>?</code>', which is that when matching file names (not in other
|
|
places patterns are used, however) they never match a leading `<code>.</code>'.
|
|
This is a convention in UNIX-like systems to hide certain files which
|
|
are not interesting to most users. You may have got the impression that
|
|
files begining with `<code>.</code>' are somehow special, but that's not so; only
|
|
the files `<code>.</code>' (the current directory) and `<code>..</code>' (the parent
|
|
directory, or the current directory in <code>/</code>) are special to the system.
|
|
Other files beginning with `<code>.</code>' only appear special because of a
|
|
conspiracy between the shell (the rule I've just given) and the command
|
|
<code>ls</code>, which, when it lists a directory, doesn't show files beginning
|
|
`<code>.</code>' unless you give the `<code>-a</code>' option. Otherwise `<code>.</code>'-files are
|
|
perfectly normal files.</p>
|
|
<p>You can suppress the special rule for an initial `<code>.</code>' by setting the
|
|
option <code>GLOB_DOTS</code>, in which case `<code>*</code>' will match every single file
|
|
and directory except for `<code>.</code>' and `<code>..</code>'.</p>
|
|
<p>In addition to `<code>*</code>' and `<code>?</code>', which are so basic that even DOS had
|
|
them (though I never <em>quite</em> worked out exactly what it was doing with
|
|
them a lot of the time), the pattern consisting of a set of characters
|
|
in square brackets appears in all shells. This feature happens to be
|
|
pretty much the same as in regular expressions. `<code>[abc]</code>' matches any
|
|
one of those three characters; `<code>[a-z]</code>' matches any character between
|
|
<code>a</code> and <code>z</code>, inclusive; `<code>[^a-z]</code>' matches any single character
|
|
<em>except</em> those 26 --- but notice it still matches a single character.</p>
|
|
<p>A recent common enhancement to character ranges features in zsh, which
|
|
is to specify types of characters instead of listing them; I'm just
|
|
repeating the manual entry here, which you should consult for more
|
|
detail. The special syntax is like `<code>[:</code><em>spec</em><code>:]</code>', where the square
|
|
brackets there are in addition to the ones specifying the range. If you
|
|
are familiar with the `ctype' macros use in C programmes, you will
|
|
probably recognise the things that <em>spec</em> can be: <code>alnum</code>, <code>alpha</code>,
|
|
<code>blank</code>, <code>cntrl</code>, <code>digit</code>, <code>graph</code>, <code>lower</code>, <code>print</code>, <code>punct</code>, <code>space</code>,
|
|
<code>upper</code>, <code>xdigit</code>. The similarity to C macros isn't just for show: the
|
|
shell really does call the macro (or function) `<code>isalpha</code>' to test for
|
|
<code>[:alpha:]</code>ness, and so on. On most modern systems which support
|
|
internationalization this means the shell can tell you whether a
|
|
character is, say, an alphabetic letter in the character set in use on
|
|
your machine. By the way, zsh doesn't use international character set
|
|
support for sorting matches --- this turned out to produce too many
|
|
unexpected effects.</p>
|
|
<p>So `<code>[^[:digit:]]</code>' matches any single character other than a decimal
|
|
digit. Standards say you should use `<code>!</code>' instead of `<code>^</code>' to signify
|
|
negation, but most people I know don't; also, this can clash with
|
|
history substitution. However, it is accepted by zsh anywhere where
|
|
history substitution doesn't get its hands on the `<code>!</code>' first (which
|
|
includes all scripts and autoloaded functions).</p>
|
|
<p><span id="l138"></span></p>
|
|
<h3 id="593-extensions-usually-available"><a class="header" href="#593-extensions-usually-available">5.9.3: Extensions usually available</a></h3>
|
|
<p>Now we reach the bits specific to zsh. I've divided these into two
|
|
parts, since some require the option `<code>EXTENDED_GLOB</code>' to be set ---
|
|
those which are most likely to clash with other uses of the characters
|
|
in question.</p>
|
|
<p><strong>Numeric ranges</strong></p>
|
|
<p>One possibility that is always available is the syntax for numeric
|
|
ranges in the form `<code><</code><em>num1</em><code>-</code><em>num2</em><code>></code>'. You can omit either <em>num1</em>,
|
|
which defaults to zero, or <em>num2</em>, which defaults to infinity, or both,
|
|
in which case any set of digits will be matched. Note that this really
|
|
<em>does</em> mean infinity, despite the finite range of integers; missing out
|
|
<em>num2</em> is treated as a special case and the shell will simply advance
|
|
over any number of digits. (In <em>very</em> old versions of zsh you had to use
|
|
`<code><></code>' to get that effect, but that has been removed and `<code><></code>' is now
|
|
a redirection operator, as in other shells; `<code><-></code>' is what you need
|
|
for any set of digits.)</p>
|
|
<p>I repeat another warning from the manual: this test</p>
|
|
<pre><code> [[ 342 = <1-30>* ]]
|
|
</code></pre>
|
|
<p>succeeds, even though the number isn't in the range 1 to 30. That's
|
|
because `<code><1-30></code>' matches `<code>3</code>' and `<code>*</code>' matches 42. There's no use
|
|
moaning, it's a consequence of the usual rule for patterns of all types
|
|
in shells or utilities: pattern operators are tried independently, and
|
|
each `uses up' the longest piece of the string it is matching without
|
|
causing the rest of the match to fail. We would have to break this
|
|
simple and well-tried rule to stop numeric ranges matching if there is
|
|
another digit left. You can test for that yourself, of course:</p>
|
|
<pre><code> [[ 342 = <1-30>(|[^[:digit:]]*) ]]
|
|
</code></pre>
|
|
<p>fails. I wrote it so that it would match any number between 1 and 30,
|
|
either not followed by anything, or followed by something which doesn't
|
|
start with a digit; I will explain what the parentheses and the vertical
|
|
bar are doing in the next section. By the way, leading zeroes are
|
|
correctly handled (and never force octal interpretation); so
|
|
`<code>00000003NaN</code>' would successfully match the pattern.</p>
|
|
<p>The numbers in the range are always positive integers; you need extra
|
|
pattern trickery to match floating point. Here's one attempt, which uses
|
|
<code>EXTENDED_GLOB</code> operators, so come back and look when you've read the
|
|
rest of this section if it doesn't make sense now:</p>
|
|
<pre><code> isfloat() {
|
|
setopt localoptions extendedglob
|
|
if [[ $1 = ([-+]|)([0-9]##.[0-9]#|[0-9]#.[0-9]##)\
|
|
([eE]([-+]|)[0-9]##|) ]]; then
|
|
print -r -- "$1 is a floating point number"
|
|
else
|
|
print -r -- "$1 is not a floating point number"
|
|
fi
|
|
}
|
|
</code></pre>
|
|
<p>I've split it over two lines to fit. The first parenthesis matches an
|
|
optional minus or plus sign --- careful with `<code>-</code>' in square brackets,
|
|
since if it occurs in the middle it's taken as a range, and if you want
|
|
it to match itself, it has to be at the start or end. The second
|
|
parenthesis contains an alternative because `<code>.</code>' isn't a floating
|
|
point number (at least, not in my book, and not in zsh's, either), but
|
|
both `<code>0.</code>' and `<code>.0</code>' <em>are</em> properly formed numbers. So we need at
|
|
least one digit, either before or after the decimal point; the `<code>##</code>'
|
|
means `at least one occurrence of the previous expression', while the
|
|
`<code>#</code>' means `zero or more occurrences of the previous expression'. The
|
|
expresion on the next line matches an exponent; here you need at least
|
|
one digit, too. So `<code>3.14159E+00</code>' is successfully matched, and indeed
|
|
you'll find that zsh's arithmetic operations handle it properly.</p>
|
|
<p>The range operator is the only special zsh operator that you can't turn
|
|
off with an option. This is usually not a problem, but in principle a
|
|
string like `<code><3-10></code>' is ambiguous, since in another shell it would be
|
|
read as `<code><3-10 ></code>', meaning `take input from file <code>3-10</code>, and send
|
|
output to the file formed by whatever comes after the expression'. It's
|
|
very unlikely you will run across this in practice, however, since shell
|
|
code writers nearly alwys put a space after the end of a file name for
|
|
redirection if something else follows on the command line, and that's
|
|
enough to differentiate it from a range operator.</p>
|
|
<p><strong>Parentheses</strong></p>
|
|
<p>Parentheses are quite natural in zsh if you've used extended regular
|
|
expressions. They are usually available, and only turned off if you set
|
|
the `<code>SH_GLOB</code>' option to ensure compatibility with shells that don't
|
|
have it. The key part of the expression is the vertical bar, which
|
|
specifies an alternative. It can occur as many times as necessary;
|
|
`<code>(a|b|c|d|e|f|g|h|i|j|k|l|m)</code>' is a rather idiosyncratic way of
|
|
writing `<code>[a-m]</code>'. If you don't include the vertical bar (we'll see
|
|
reasons for not doing so later), and you are generating filenames, you
|
|
should be careful that the expression doesn't occur at the end of the
|
|
pattern, else it would be taken as a `glob qualifier', as described
|
|
below. The rather unsightly hack of putting `<code>(|)</code>' (match the empty
|
|
string or the empty string --- guess what this matches?) right at the
|
|
end will get around that problem.</p>
|
|
<p>The vertical bar usually needs to be inside parentheses so that the
|
|
shell doesn't take it as a pipe, but in some contexts where this won't
|
|
happen, such as a case statement label, you can omit any parentheses
|
|
that would completely surround the pattern. So in</p>
|
|
<pre><code> case $foo in
|
|
(bar|rod|pipe) print "foo represents a piece of metal"
|
|
;;
|
|
(*) print "Are you trying to be different?"
|
|
;;
|
|
esac
|
|
</code></pre>
|
|
<p>the surrounding parentheses are the required syntax for <code>case</code>, rather
|
|
than pattern parentheses --- the same syntax works in other shells. Then
|
|
`<code>bar|rod</code>' is an ordinary zsh expression matching either <code>bar</code> or
|
|
<code>rod</code>, in a context where the `<code>|</code>' can't be mistaken for a pipe. In
|
|
fact, this whole example works with <code>ksh</code> --- but there the use of
|
|
`<code>|</code>' is a special case, while in zsh it fits in with the standard
|
|
pattern rules.</p>
|
|
<p>Indeed, ksh has slightly different ways of specifying patterns: to make
|
|
the use of parentheses less ambiguous, it requires a character before
|
|
the left parenthesis. The corresponding form for a simple alternative is
|
|
`<code>@(this|that)</code>'. The `<code>@</code>' can also be a `<code>?</code>', for zero or one
|
|
occurrences of what's in the parentheses; `<code>*</code>' for any number of
|
|
repetitions, for example `<code>thisthisthatthis</code>'; or `<code>!</code>' for anything
|
|
except what's in the parentheses. Zsh allows this syntax if you set the
|
|
option <code>KSH_GLOB</code>. Note that this is independent of the option
|
|
<code>SH_GLOB</code>; if you set <code>KSH_GLOB</code> but not <code>SH_GLOB</code>, you can actually use
|
|
both forms for pattern matching, with the ksh form taking precedence in
|
|
the case of ambiguities. This is probably to be avoided. In ksh
|
|
emulation, both options are set; this is the only sensible reason I know
|
|
of for using these options at all. I'll show some comparisons in the
|
|
next section.</p>
|
|
<p>An important thing to note is that when you are matching files, you
|
|
can't put directory separators inside parentheses:</p>
|
|
<pre><code> # Doesn't work!
|
|
print (foo/bar|bar/foo)/file.c
|
|
</code></pre>
|
|
<p>doesn't work. The reason is that it's simply too difficult to write;
|
|
pattern matching would be bound in a highly intricate way with searching
|
|
the directory hierarchy, with the end of a group sending you back up to
|
|
try another bit of the pattern on a directory you'd already visited.
|
|
It's probably not impossible, but the pattern code maintainer (me) isn't
|
|
all that enthusiastic about it.</p>
|
|
<p><span id="l139"></span></p>
|
|
<h3 id="594-extensions-requiring-extended_glob"><a class="header" href="#594-extensions-requiring-extended_glob">5.9.4: Extensions requiring <code>EXTENDED_GLOB</code></a></h3>
|
|
<p>Setting <code>EXTENDED_GLOB</code> makes three new types of operator available:
|
|
those which excluded a particular pattern from matching; those which
|
|
specify that a pattern may occur a repeated number of times; and a set
|
|
of `globbing flags', a little bit like parameter flags which I'll
|
|
describe in a later section since they are really the icing on the cake.</p>
|
|
<p><strong>Negative matches or exclusions</strong></p>
|
|
<p>The simpler of the two exclusions uses `<code>^</code>' to introduce a pattern
|
|
which must <em>not</em> be matched. So a trivial example (I will assume for
|
|
much of the rest of the chapter that the option <code>EXTENDED_GLOB</code> is set)
|
|
is:</p>
|
|
<pre><code> [[ foo = ^foo ]]
|
|
[[ bar = ^foo ]]
|
|
</code></pre>
|
|
<p>The first test fails, the second succeeds. It's important to realise
|
|
that that the pattern demands nothing else whatever about the relevant
|
|
part of the test string other than it doesn't match the pattern that
|
|
follows: it doesn't say what length the matched string should have, for
|
|
example. So</p>
|
|
<pre><code> [[ foo = *^foo ]]
|
|
</code></pre>
|
|
<p>actually <em>does</em> match: <code>*</code> swallows up the whole string, and the
|
|
remaining empty string successfully fails to be `<code>foo</code>'. Remember the
|
|
mantra: each part of the pattern matches the longest possible substring
|
|
that causes the remainder of the pattern not to fail (unless, of course,
|
|
failure is unavoidable).</p>
|
|
<p>Note that the <code>^</code> applies to the whole pattern to its right, either to
|
|
the end of the string, or to the end of the nearest enclosing
|
|
parenthesis. Here's a couple more examples:</p>
|
|
<pre><code> [[ foo = ^foo* ]]
|
|
</code></pre>
|
|
<p>Overall, this fails to match: the pattern `<code>foo*</code>' always matches the
|
|
string on the left, so negating means it always fails.</p>
|
|
<pre><code> [[ foo = (^foo)* ]]
|
|
</code></pre>
|
|
<p>This is similar to the last example but one. The expression in the
|
|
parenthesis first matches against <code>foo</code>; this causes the overall match
|
|
to fail because of the <code>^</code>, so it backs up one character and tries
|
|
again. Now `<code>fo</code>' is successfully matched by <code>^foo</code> and the remaining
|
|
`<code>o</code>' is matched by the <code>*</code>, so the overall match succeeds. When you
|
|
know about backreferences, you will be able to confirm that, indeed, the
|
|
expression in parentheses matches `<code>fo</code>'. This is a quite subtle point:
|
|
it's easy to imagine that `<code>^foo</code>' says `match any three letter string
|
|
except the one I've given you', but actually there is no requirement
|
|
that it match three letters, or indeed any.</p>
|
|
<p>In filename generation, the <code>^</code> has a lower precedence than a slash:</p>
|
|
<pre><code> % print /*/tmp
|
|
/data/tmp /home/tmp /usr/tmp /var/tmp
|
|
% print /^usr/tmp
|
|
/data/tmp /home/tmp /var/tmp
|
|
</code></pre>
|
|
<p>successfully caused the first level of directories to match anything but
|
|
`<code>usr</code>'. A typical use of this with files is `<code>^*.o</code>' to match
|
|
everything in a directory except files which end with `<code>.o</code>'.</p>
|
|
<p>Note one point mentioned in the FAQ --- probably indicating the reason
|
|
that `<code>^</code>' is only available with <code>EXTENDED_GLOB</code> switched on. Some
|
|
commands use an initial `<code>^</code>' to indicate a control character; in fact,
|
|
zsh's <code>bindkey</code> builtin does this:</p>
|
|
<pre><code> bindkey '^z' backward-delete-word
|
|
</code></pre>
|
|
<p>which attaches the given function to the keystroke <code>Ctrl-z</code>. You must
|
|
remember to quote that keystroke expression, otherwise it would expand
|
|
to a list of all files in the current directory not called `<code>z</code>', very
|
|
likely all of them.</p>
|
|
<p>There's another reason this isn't available by default: in some versions
|
|
of the Bourne shell, `<code>^</code>' was used for pipes since `<code>|</code>' was missing
|
|
on some keyboards.</p>
|
|
<p>The other exclusion operator is closely related. `<em>pat1</em><code>~</code><em>pat2</em>'
|
|
means `anything that matches <em>pat1</em> as long as it doesn't also match
|
|
<em>pat2</em>'. If <em>pat1</em> is <code>*</code>, you have the same effect as `<code>^</code>' --- in
|
|
fact, that's pretty much how `<code>^</code>' is currently implemented.</p>
|
|
<p>There's one significant difference between `<code>*~</code><em>pat</em>' and `<code>^</code><em>pat</em>':
|
|
the <code>~</code> has a <em>lower</em> precedence than `<code>/</code>' when matching against
|
|
filenames. What's more, the pattern on the right of the <code>~</code> is not
|
|
treated as a filename at all; it's simply matched against any filename
|
|
found on the left, to see if it should be rejected. This sounds like
|
|
black magic, but it's actually quite useful, particularly in combination
|
|
with the recursive globbing syntax:</p>
|
|
<pre><code> print **/*~*/CVS(/)
|
|
</code></pre>
|
|
<p>matches any subdirectory of the current directory to any depth, except
|
|
for directories called <code>CVS</code> --- the `<code>*</code>' on the right of the `<code>~</code>'
|
|
will match any character including `<code>/</code>'. The final `<code>(/)</code>' is a glob
|
|
qualifier indicating that only directories are to be allowed to match
|
|
--- note that it's a positive assertion, despite being after the `<code>~</code>'.
|
|
Glob qualifiers do not feel the effect of preceding exclusion operators.</p>
|
|
<p>Note that in that example, any subdirectory of a directory called <code>CVS</code>
|
|
would have matched successfully; you can see from the pattern that the
|
|
expression after the `<code>~</code>' wouldn't weed it out. Slightly less
|
|
obviously, the `<code>**/*</code>' matches files in the current directory, while
|
|
the `<code>*/CVS</code>' never matches a `<code>CVS</code>' in the current directory, so
|
|
that could appear. If you want to, you can fix that up like this:</p>
|
|
<pre><code> print **/*~(*/|)CVS(/*|)(/)
|
|
</code></pre>
|
|
<p>again relying on the fact that `<code>/</code>'s are not special after the `<code>~</code>'.
|
|
This will ruthlessly weed out any path with a directory component called
|
|
<code>CVS</code>. An easier, but less instructive, way is</p>
|
|
<pre><code> print ./**/*~*/CVS(/)
|
|
</code></pre>
|
|
<p>You can restrict the range of the tilde operator by putting it in
|
|
parentheses, so `<code>/(*~usr)/tmp</code>' is equivalent to `<code>/^usr/tmp</code>'.</p>
|
|
<p>A `<code>~</code>' at the beginning is never treated as excluding what follows; as
|
|
you already know, it has other uses. Also, a `<code>~</code>' at the end of a
|
|
pattern isn't special either; this is lucky, because Emacs produces
|
|
backup files by adding a `<code>~</code>' to the end of the file name. You may
|
|
have problems if you use Emacs's facility for numbered backup files,
|
|
however, since then there is a `<code>~</code>' in the middle of the file name,
|
|
which will need to be quoted when used in the shell.</p>
|
|
<p><strong>Closures or repeated matches</strong></p>
|
|
<p>The extended globbing symbols `<code>#</code>' and `<code>##</code>', when they occur in a
|
|
pattern, are equivalent to `<code>*</code>' and `<code>+</code>' in extended regular
|
|
expressions: `<code>#</code>' allows the previous pattern to match any number of
|
|
times, including zero, while with `<code>##</code>' it must match at least once.
|
|
Note that this pattern does not extend beyond two hashes --- there is no
|
|
special symbol `<code>###</code>', which is not recognised as a pattern at all.</p>
|
|
<p>The `previous pattern' is the smallest possible item which could be
|
|
considered a complete pattern. Very often it is something in
|
|
parentheses, but it could be a group in square or angle brackets, or a
|
|
single ordinary character. Note particularly that in</p>
|
|
<pre><code> # fails
|
|
[[ foofoo = foo# ]]
|
|
</code></pre>
|
|
<p>the test fails, because the `<code>#</code>' only refers to the final `<code>o</code>', not
|
|
the entire string. What you need is</p>
|
|
<pre><code> # succeeds
|
|
[[ foofoo = (foo)# ]]
|
|
</code></pre>
|
|
<p>It might worry you that `<code>#</code>' also introduces comments. Since a
|
|
well-formatted pattern never has `<code>#</code>' at the start, however, this
|
|
isn't a problem unless you expect comments to start in the middle of a
|
|
word. It turns out that doesn't even happen in other shells --- `<code>#</code>'
|
|
must be at the start of a line, or be unquoted and have space in front
|
|
of it, to be recognised as introducing a comment. So in fact there is no
|
|
clash at all here. There is, of course, a clash if you expect
|
|
`<code>.#foo.c.1.131</code>' (probably a file produced by the version control
|
|
system CVS while attempting to resolve a conflict) to be a plain string,
|
|
hence the dependence on the <code>EXTENDED_GLOB</code> option.</p>
|
|
<p>That's probably all you need to know; the `<code>#</code>' operators are generally
|
|
much easier to understand than the exclusion operators. Just in case you
|
|
are confused, I might as well point out that repeating a <em>pattern</em> is
|
|
not the same as repeating a <em>string</em>, so</p>
|
|
<pre><code> [[ onetwothreetwoone = (one|two|three)## ]]
|
|
</code></pre>
|
|
<p>successfully matches; the string is different for each repetition of the
|
|
pattern, but that doesn't matter.</p>
|
|
<p>We now have enough information to construct a list of correspondences
|
|
between zsh's normal pattern operators and the ksh ones, available with
|
|
<code>KSH_GLOB</code>. Be careful with `<code>!</code>(<em>...</em>)'; it seems to have a slightly
|
|
different behaviour to the zsh near-equivalent. The following table is
|
|
lifted directly from the zsh FAQ.</p>
|
|
<pre><code>----------------------------------------------------------------------
|
|
ksh zsh Meaning
|
|
------ ------ ---------
|
|
!(foo) ^foo Anything but foo.
|
|
or foo1~foo2 Anything matching foo1 but foo2.
|
|
@(foo1|foo2|...) (foo1|foo2|...) One of foo1 or foo2 or ...
|
|
?(foo) (foo|) Zero or one occurrences of foo.
|
|
*(foo) (foo)# Zero or more occurrences of foo.
|
|
+(foo) (foo)## One or more occurrences of foo.
|
|
----------------------------------------------------------------------
|
|
</code></pre>
|
|
<p>In both languages, the vertical bar for alternatives can appear inside
|
|
any set of parentheses. Beware of the precedences of <code>^foo</code> and
|
|
`<code>foo1~foo2</code>'; surround them with parentheses, too, if necessary.</p>
|
|
<p><span id="l140"></span></p>
|
|
<h3 id="595-recursive-globbing"><a class="header" href="#595-recursive-globbing">5.9.5: Recursive globbing</a></h3>
|
|
<p>One of the most used special features of zsh, and one I've already used
|
|
a couple of times in this section, is recursive globbing, the ability to
|
|
match any directory in an arbitrarily deep (or, as we say in English,
|
|
tall) tree of directories. There are two forms: `<code>**/</code>' matches a set
|
|
of directories to any depth, including the top directory, what you get
|
|
by replacing `<code>**/</code>' by `<code>./</code>, i.e. <code>**/foo</code> can match <code>foo</code> in the
|
|
current directory, but also <code>bar/foo</code>, <code>bar/bar/bar/foo</code>,
|
|
<code>bar/bar/bar/poor/little/lambs/foo</code> nad so on. `<code>***/</code>' does the same,
|
|
but follows symbolic links; this can land you in infinite loops if the
|
|
link points higher up in the same directory hierarchy --- an odd thing
|
|
to do, but it can happen.</p>
|
|
<p>The `<code>**/</code>' or `<code>***/</code>' can't appear in parentheses; there's no way of
|
|
specifying them as alternatives. As already noticed, however, the
|
|
precedence of the exclusion operator `<code>~</code>' provides a useful way of
|
|
removing matches you don't want. Remember, too, the recursion operators
|
|
don't need to be at the start of the pattern:</p>
|
|
<pre><code> print ~/**/*.txt
|
|
</code></pre>
|
|
<p>prints the name of all the files under your home directory ending with
|
|
`<code>.txt</code>'. Don't expect it to be particularly fast; it's not as well
|
|
optimised as the standard UNIX command <code>find</code>, although it is a whole
|
|
lot more convenient. The traditional way of searching a file which may
|
|
be anywhere in a directory tree is along the lines of:</p>
|
|
<pre><code> find ~/src -name '*.c' -print | xargs grep pattern
|
|
</code></pre>
|
|
<p>which is horrendously cumbersome. What's happening is that <code>find</code>
|
|
outputs a newline-separated list of all the files it finds, and <code>xargs</code>
|
|
assembles these as additional arguments to the command `<code>grep pattern</code>'. It simplifies in zsh to the much more natural</p>
|
|
<pre><code> grep pattern ~/src/**/*.c
|
|
</code></pre>
|
|
<p>In fact, strictly speaking you probably ought to use</p>
|
|
<pre><code> find ~/src -name '*.c' -print0 | xargs -0 grep pattern
|
|
</code></pre>
|
|
<p>for the other form --- this passes null-terminated strings around, which
|
|
is safer since any character other than a NUL or a slash can occur in a
|
|
filename. But of course you don't need that now.</p>
|
|
<p>Do remember that this includes the current directory in the search, so
|
|
in that last example `<code>foo.c</code>' in the directory where you typed the
|
|
command would be searched. This isn't completely obvious because of the
|
|
`<code>/</code>' in the pattern, which erroneously seems to suggest at least one
|
|
directory.</p>
|
|
<p>It's a little known fact that this is a special case of a more general
|
|
syntax, `(<em>pat</em><code>/</code>)<code>#</code>'. This syntax isn't perfect, either; it's the
|
|
only time where a `<code>/</code>' can usefully occur in parentheses. The pattern
|
|
<em>pat</em> is matched against each directory; if it succeeds, <em>pat</em> is
|
|
matched against each of the subdirectories, and so on, again to
|
|
arbitrary depth. As this uses the character `<code>#</code>', it requires the
|
|
<code>EXTENDED_GLOB</code> option, which the more common syntax doesn't, since
|
|
no-one would write two <code>*</code>'s in a row for any other reason.</p>
|
|
<p>You should consider the `<code>/</code>)' to be in effect a single pattern token;
|
|
for example in</p>
|
|
<pre><code> % print (F*|B*/)#*.txt
|
|
FOO/BAR/thingy.txt
|
|
</code></pre>
|
|
<p>both `<code>F*</code>' and `<code>B*</code>' are possible directory names, not just the
|
|
`<code>B*</code>' next to the slash. The difference between `<code>#</code>' and `<code>##</code>' is
|
|
respected here --- with the former, zero occurrences of the pattern may
|
|
be matched (i.e. `<code>*.txt</code>'), while with the latter, at least one level
|
|
of subdirectories is required. Thus `<code>(*/)##*.txt</code>' is equivalent to
|
|
`<code>*/**/*.txt</code>', except that the first `<code>*</code>' in the second pattern will
|
|
match a symbolic link to a directory; there's no way of forcing the
|
|
other syntax to follow symbolic links.</p>
|
|
<p>Fairly obviously, this syntax is only useful with files. Other uses of
|
|
patterns treat slashes as ordinary characters and `<code>**</code>' or `<code>***</code>'
|
|
the same as a single `<code>*</code>'. It's not an error to use multiple `<code>*</code>'s,
|
|
though, just pointless.</p>
|
|
<p><span id="l141"></span></p>
|
|
<h3 id="596-glob-qualifiers"><a class="header" href="#596-glob-qualifiers">5.9.6: Glob qualifiers</a></h3>
|
|
<p>Another very widely used zsh enhancement is the ability to select types
|
|
of file by using `glob qualifiers', a group of (rather terse) flags in
|
|
parentheses at the end of the pattern. Like recursive globbing, this
|
|
feature only applies for filename generation in the command line
|
|
(including an array assignment), not for other uses of patterns.</p>
|
|
<p>This feature requires the <code>BARE_GLOB_QUAL</code> option to be turned on, which
|
|
it usually is; the name implies that one day there may be another,
|
|
perhaps more ksh-like, way of doing the same thing with a more
|
|
indicative syntax than just a pair of parentheses.</p>
|
|
<p><strong>File types</strong></p>
|
|
<p>The simplest glob qualifiers are similar to what the completion system
|
|
appends at the end of file names when the <code>LIST_TYPES</code> option is on;
|
|
these are in turn similar to the indications used by `<code>ls -F</code>'. So</p>
|
|
<pre><code> % print *(.)
|
|
file1 file2 cmd1 cmd2
|
|
% print *(/)
|
|
dir1 dir2
|
|
% print *(*)
|
|
cmd1 cmd2
|
|
% print *(@)
|
|
symlink1 symlink2
|
|
</code></pre>
|
|
<p>where I've invented unlikely filenames with obvious types. <code>file1</code> and
|
|
<code>file2</code> were supposed to be just any old file; <code>(.)</code> picks up those but
|
|
also executable files. Sockets <code>(=)</code>, named pipes <code>(p)</code>, and device
|
|
files <code>(%)</code> including block <code>(%b)</code> and character <code>(%c)</code> special files
|
|
are the other types of file you can detect.</p>
|
|
<p>Associated with type, you can also specify the number of hard links to a
|
|
file: <code>(l2)</code> specifies exactly 2 links, <code>(l+3)</code> more than 3 links,
|
|
<code>(l-5)</code> fewer than 5.</p>
|
|
<p><strong>File permissions</strong></p>
|
|
<p>Actually, the <code>(*)</code> qualifier really applies to the file's permissions,
|
|
not it's type, although it does require the file to be an executable
|
|
non-special file, not a directory nor anything wackier. More basic
|
|
qualifiers which apply just to the permissions of the files are <code>(r)</code>,
|
|
<code>(w)</code> and <code>(x)</code> for files readable, writeable and executable by the
|
|
owner; <code>(R)</code>, <code>(W)</code> and <code>(X)</code> correspond to those for world permissions,
|
|
while <code>(A)</code>, <code>(I)</code> and <code>(E)</code> do the job for group permissions --- sorry,
|
|
the Latin alphabet doesn't have middle case. You can speciy permissions
|
|
more exactly with `<code>(f)</code>' for file permissions: the expression after
|
|
this can take various forms, but the easiest is probably a delimited
|
|
string, where the delimiters work just like the arguments for parameter
|
|
flags and the arguments, separated by commas, work just like symbolic
|
|
arguments to <code>chmod</code>; the example from the manual,</p>
|
|
<pre><code> print *(f:gu+w,o-rx:)
|
|
</code></pre>
|
|
<p>picks out files (of any type) which are writeable by the owner (`user')
|
|
and group, and neither readable nor executable by anyone else
|
|
(`other').</p>
|
|
<p><strong>File ownership</strong></p>
|
|
<p>You can match on the other three mode bits, setuid ((s)), setgid ((S))
|
|
and sticky ((t)), but I'm not going to go into what those are if you
|
|
don't know; your system's manual page for <code>chmod</code> may (or may not)
|
|
explain.</p>
|
|
<p>Next, you can pick out files by owner; <code>(U)</code> and <code>(G)</code> say that you or
|
|
your group, respectively, owns the file --- really the effective user or
|
|
group ID, which is usually who you are logged in as, but this may be
|
|
altered by tricks such as a programme running setuid or setgid (the
|
|
things I'm not going to explain). More generally, <code>u0</code> says that the
|
|
file is owned by root and <code>(u501)</code> says it is owned by user ID 501; you
|
|
can use names if you delimiit them, so <code>(u:pws:)</code> says that the owner
|
|
must be user <code>pws</code>; similarly for groups with <code>(g)</code>.</p>
|
|
<p><strong>File times</strong></p>
|
|
<p>You can also pick files by modification ((m)) or access ((a)) time,
|
|
either before ((-)), at, or after ((+)) a specific time, which may be
|
|
measured in days (the default), months ((M)), weeks ((w)), hours ((h)),
|
|
minutes ((m)) or seconds ((s)). These must appear in the order <code>m</code> or
|
|
<code>a</code>, optional unit, optional plus or minus, number. Hence:</p>
|
|
<pre><code> print *(m1)
|
|
</code></pre>
|
|
<p>Files that were modified one day ago --- i.e. less than 48 but more than
|
|
24 hours ago.</p>
|
|
<pre><code> print *(aw-1)
|
|
</code></pre>
|
|
<p>Files accessed within the last week, i.e. less than 7 days ago.</p>
|
|
<p>In addition to <code>(m)</code> and ((a)), there is also <code>(c)</code>, which is sometimes
|
|
said to refer to file creation, but it is actually something a bit less
|
|
useful, namely <em>inode</em> change. The inode is the structure on disk where
|
|
UNIX-like filing systems record the information about the location and
|
|
nature of the file. Information here can change when some aspect of the
|
|
file information, such as permissions, changes.</p>
|
|
<p><strong>File size</strong></p>
|
|
<p>The qualifier <code>(L)</code> refers to the file size (`L' is actually for
|
|
length), by default in bytes, but it can be in kilobytes <code>(k)</code>,
|
|
megabytes <code>(m)</code>, or 512-byte blocks <code>(p, unfortunately)</code>. Plus and minus
|
|
can be used in the same way as for times, so</p>
|
|
<pre><code> print *(Lk3)
|
|
</code></pre>
|
|
<p>gives files 3k large, i.e. larger than 2k but smaller than 4k, while</p>
|
|
<pre><code> print *(Lm+1)
|
|
</code></pre>
|
|
<p>gives files larger than a megabyte.</p>
|
|
<p>Note that file size applies to directories, too, although it's not very
|
|
useful. The size of directories is related to the number of slots for
|
|
files currently available inside the directory (at the highest level,
|
|
i.e. not counting children of children and deeper). This changes
|
|
automatically if necessary to make more space available.</p>
|
|
<p><strong>File matching properties</strong></p>
|
|
<p>There are a few qualifiers which affect option settings just for the
|
|
match in question: <code>(N)</code> turns on <code>NULL_GLOB</code>, so that the pattern
|
|
simply disappears from the command line if it fails to match; <code>(D)</code>
|
|
turns on <code>GLOB_DOTS</code>, to match even files beginning with a `<code>.</code>', as
|
|
described above; <code>(M)</code> or <code>(T)</code> turn on <code>MARK_DIRS</code> or <code>LIST_TYPES</code>, so
|
|
that the result has an extra character showing the type of a directory
|
|
only (in the first case) or of any special file (in the second); and
|
|
<code>(n)</code> turns on <code>NUMERIC_GLOB_SORT</code>, so that numbers in the filename are
|
|
sorted arithmetically --- so <code>10</code> comes after <code>1A</code>, because the 1 and 10
|
|
are compared before the next character is looked at.</p>
|
|
<p>Other than being local to the pattern qualified, there is no difference
|
|
in effect from setting the option itself.</p>
|
|
<p><strong>Combining qualifiers</strong></p>
|
|
<p>One of the reasons that some qualifiers have slightly obscure syntax is
|
|
that you can chain any number of them together, which requires that the
|
|
file has all of the given properties. In other words `<code>*(UWLk-10)</code>' are
|
|
files owned by you, world writeable and less than 10k in size.</p>
|
|
<p>You can negate a set of qualifiers by putting `<code>^</code>' in front of those,
|
|
so `<code>*(ULk-10^W)</code>' would specify the corresponding files which were not
|
|
world writeable. The `<code>^</code>' applies until the end of the flags, but you
|
|
can put in another one to toggle back to assertion instead of negation.</p>
|
|
<p>Also, you can specify alternatives; `<code>*(ULk-10,W)</code>' are files which
|
|
either are owned by you and are less than 10k, or are world writeable
|
|
--- note that the `and' has higher precedence than the `or'.</p>
|
|
<p>You can also toggle whether the assertions or negations made by
|
|
qualifiers apply to symbolic links, or the files found by following
|
|
symbolic links. The default is the former --- otherwise the <code>(@)</code>
|
|
qualifier wouldn't work on its own. By preceding qualifiers with <code>-</code>,
|
|
they will follow symbolic links. So <code>*(-/)</code> matches all directories,
|
|
including those reached by a symbolic link (or more than one symbolic
|
|
link, up to the limit allowed by your system). As with `<code>^</code>', you can
|
|
toggle this off again with another one `<code>-</code>'. To repeat what I said in
|
|
<a href="zshguide03.html#syntax">chapter 3</a>, you can't distinguish between the
|
|
other sort of links, hard links, and a real file entry, because a hard
|
|
link just supplies an alternative but equivalent name for a file.</p>
|
|
<p>There's a nice trick to find broken symlinks: the pattern `<code>**/*(-@)</code>'.
|
|
This is supposed to follow symlinks; but that `<code>@</code>' tells it to match
|
|
only on symlinks! There is only one case where this can succeed, namely
|
|
where the symlink is broken. (This was pointed out to me by Oliver
|
|
Kiddle.)</p>
|
|
<p><strong>Sorting and indexing qualifiers</strong></p>
|
|
<p>Normally the result of filename generation is sorted by alphabetic order
|
|
of filename. The globbing flags <code>(o)</code> and <code>(O)</code> allow you to sort in
|
|
normal or reverse order of other things: <code>n</code> is for names, so <code>(on)</code>
|
|
gives the default behaviour while <code>(On)</code> is reverse order; <code>L</code>, <code>l</code>,
|
|
<code>m</code>, <code>a</code> and <code>c</code> refer to the same thing as the normal flags with those
|
|
letters, i.e. file size, number of links, and modification, access and
|
|
inode change times. Finally, <code>d</code> refers to subdirectory depth; this is
|
|
useful with recursive globbing to show a file tree ordered depth-first
|
|
(subdirectory contents appear before files in any given directory) or
|
|
depth-last.</p>
|
|
<p>Note that time ordering produces the most recent first as the standard
|
|
ordering (<code>(om)</code>, etc.), and oldest first as the reverse ordering
|
|
<code>(OM)</code>, etc.). With size, smallest first is the normal ordering.</p>
|
|
<p>You can combine ordering criteria, with the most important coming first;
|
|
each criterion must be preceded by <code>o</code> or <code>O</code> to distinguish it from an
|
|
ordinary globbing flag. Obviously, <code>n</code> serves as a complete
|
|
discriminator, since no two different files can have the same name, so
|
|
this must appear on its own or last. But it's a good idea, when doing
|
|
depth-first ordering, to use <code>odon</code>, so that files at a particular depth
|
|
appear in alphabetical order of names. Try</p>
|
|
<pre><code> print **/*(odon)
|
|
</code></pre>
|
|
<p>to see the effect, preferably somewhere above a fairly shallow directory
|
|
tree or it will take a long time.</p>
|
|
<p>There's an extra trick you can play with ordered files, which is to
|
|
extract a subset of them by indexing. This works just like arrays, with
|
|
individual elements and slices.</p>
|
|
<pre><code> print *([1])
|
|
</code></pre>
|
|
<p>This selects a single file, the first in alphabetic order since we
|
|
haven't changed the default ordering.</p>
|
|
<pre><code> print *(om[1,5])
|
|
</code></pre>
|
|
<p>This selects the five most recently modified files (or all files, if
|
|
there are five or fewer). Negative indices are understood, too:</p>
|
|
<pre><code> print *(om[1,-2])
|
|
</code></pre>
|
|
<p>selects all files but the oldest, assuming there are at least two.</p>
|
|
<p>Finally, a reminder that you can stick modifiers after qualifiers, or
|
|
indeed in parentheses without any qualifiers:</p>
|
|
<pre><code> print **/*(On:t)
|
|
</code></pre>
|
|
<p>sorts files in subdirectories into reverse order of name, but then
|
|
strips off the directory part of that name. Modifiers are applied right
|
|
at the end, after all file selection tasks.</p>
|
|
<p><strong>Evaluating code as a test</strong></p>
|
|
<p>The most complicated effect is produced by the <code>(e)</code> qualifer. which is
|
|
followed by a string delimited in the now-familiar way by either
|
|
matching brackets of any of the four sorts or a pair of any other
|
|
characters. The string is evaluated as shell code; another layer of
|
|
quotes is stripped off, to make it easier to quote the code from
|
|
immediate expansion. The expression is evaulated separately for each
|
|
match found by the other parts of the pattern, with the parameter
|
|
<code>$REPLY</code> set to the filename found.</p>
|
|
<p>There are two ways to use <code>(e)</code>. First, you can simply rely on the
|
|
return code. So:</p>
|
|
<pre><code>
|
|
print *(e:'[[ -d $REPLY ]]':)
|
|
print *(/)
|
|
</code></pre>
|
|
<p>are equivalent. Note that quotes around the expression, which are
|
|
necessary in addition to the delimiters (here `<code>:</code>') for expressions
|
|
with special characters or whitespace. In particular, <code>$REPLY</code> would
|
|
have been evaluated too early --- before file generation took place ---
|
|
if it hadn't been quoted.</p>
|
|
<p>Secondly, the function can alter the value of <code>$REPLY</code> to alter the name
|
|
of the file. What's more, the expression can set <code>$reply</code> (which
|
|
overrides the use of <code>$REPLY</code>) to an array of files to be inserted into
|
|
the command line; it may be any size from zero items upward.</p>
|
|
<p>Here's the example in the manual:</p>
|
|
<pre><code> print *(e:'reply=(${REPLY}{1,2})':)
|
|
</code></pre>
|
|
<p>Note the string is delimited by colons <em>and</em> quoted. This takes each
|
|
file in the current directory, and for each returns a match which has
|
|
two entires, the filename with `<code>1</code>' appended and the filename with
|
|
`<code>2</code>' appended.</p>
|
|
<p>For anything more complicated than this, you should write a shell
|
|
function to use <code>$REPLY</code> and set that or <code>$reply</code>. Then you can replace
|
|
the whole expression in quotes with that name.</p>
|
|
<p><span id="l142"></span></p>
|
|
<h3 id="597-globbing-flags-alter-the-behaviour-of-matches"><a class="header" href="#597-globbing-flags-alter-the-behaviour-of-matches">5.9.7: Globbing flags: alter the behaviour of matches</a></h3>
|
|
<p>Another <code>EXTENDED_GLOB</code> features is `globbing flags'. These are a bit
|
|
like the flags that can appear in perl regular expressions; instead of
|
|
making an assertion about the type of the resulting match, like glob
|
|
qualifiers do, they affect the way the match is performed. Thus they are
|
|
available for all uses of pattern matching --- though some flags are not
|
|
particularly useful with filename generation.</p>
|
|
<p>The syntax is borrowed from perl, although it's not the same: it looks
|
|
like `<code>(#X)</code>', where <code>X</code> is a letter, possibily followed by an argument
|
|
(currently only a number and only if the letter is `<code>a</code>'). Perl
|
|
actually uses `<code>?</code>' instead of `<code>#</code>'; what these have in common is
|
|
that they can't appear as a valid pattern characters just after an open
|
|
parenthesis, since they apply to the pattern before. Zsh doesn't have
|
|
the rather technical flags that perl does (lookahead assertions and so
|
|
on); not surprisingly, its features are based around the shortcuts often
|
|
required by shell users.</p>
|
|
<p><strong>Mixed-case matches</strong></p>
|
|
<p>The simplest sort of globbing flag will serve as an example. You can
|
|
make a pattern, or a portion of a pattern, match case-insensitively with
|
|
the flag <code>(#i)</code>:</p>
|
|
<pre><code> [[ FOO = foo ]]
|
|
[[ FOO = (#i)foo ]]
|
|
</code></pre>
|
|
<p>Assuming you have <code>EXTENDED_GLOB</code> set so that the `<code>#</code>' is an active
|
|
pattern character, the first match fails while the second succeeds. I
|
|
mentioned portions of a pattern. You can put the flags at any point in
|
|
the pattern, and they last to the end either of the pattern or any
|
|
enclosing set of parentheses, so in</p>
|
|
<pre><code> [[ FOO = f(#i)oo ]]
|
|
[[ FOO = F(#i)oo ]]
|
|
</code></pre>
|
|
<p>once more the first match fails and the second succeeds. Alternatively,
|
|
you can put them in parentheses to limit their scope:</p>
|
|
<pre><code> [[ FOO = ((#i)fo)o ]]
|
|
[[ FOO = ((#i)fo)O ]]
|
|
</code></pre>
|
|
<p>gives a failure then a success again. Note that you need extra
|
|
parentheses; the ones around the flag just delimit that, and have no
|
|
grouping effect. This is different from Perl.</p>
|
|
<p>There are two flags which work in exactly the same way: <code>(#l)</code> says that
|
|
only lowercase letters in the pattern match case-insensitively;
|
|
uppercase letters in the pattern only match uppercase letters in the
|
|
test string. This is a little like Emacs' behaviour when searching case
|
|
insensitvely with the <code>case-fold-search</code> option variable set; if you
|
|
type an uppercase character, it will look only for an uppercase
|
|
character. However, Emacs has the additional feature that from that
|
|
point on the whole string becomes case-sensitive; zsh doesn't do that,
|
|
the flag applies strictly character by character.</p>
|
|
<p>The third flag is <code>(#I)</code>, which turns case-insensitive matching off from
|
|
that point on. You won't often need this, and you can get the same
|
|
effect with grouping --- unless you are applying the case-insensitive
|
|
flag to multiple directories, since groups can't span more than one
|
|
directory. So</p>
|
|
<pre><code> print (#i)/a*/b*/(#I)c*
|
|
</code></pre>
|
|
<p>is equivalent to</p>
|
|
<pre><code> print /[aA]*/[bB]*/c*
|
|
</code></pre>
|
|
<p>Note that case-insensitive searching only applies to characters not in a
|
|
special pattern of some sort. In particular, ranges are not
|
|
automatically made case-insensitive; instead of `<code>(#i)[ab]*</code>', you must
|
|
use `<code>[abAB]*</code>'. This may be unexpected, but it's consistent with how
|
|
other flags, notably approximation, work.</p>
|
|
<p>You should be careful with matching multiple directories
|
|
case-insensitively. First,</p>
|
|
<pre><code> print (#i)~/.Z*
|
|
</code></pre>
|
|
<p>doesn't work. This is due to the order of expansions: filename expansion
|
|
of the tilde happens before pattern matching is ever attempted, and the
|
|
`<code>~</code>' isn't at the start where filename expansion needs to find it.
|
|
It's interpreted as an empty string which doesn't match `<code>/.Z*</code>',
|
|
case-insensitively --- in other words, it will match any empty string.</p>
|
|
<p>Hence you should put `<code>(#i)</code>' and any other globbing flags after the
|
|
first slash --- unless, for some reason, you <em>really</em> want the
|
|
expression to match `<code>/Home/PWS/</code>' etc. as well as `<code>/home/pws</code>'.</p>
|
|
<p>Second,</p>
|
|
<pre><code> print (#i)$HOME/.Z*
|
|
</code></pre>
|
|
<p>does work --- prints all files beginning `<code>.Z</code>' or `<code>.z</code>' in your home
|
|
directory --- but is inefficient. Assume <code>$HOME</code> expands to my home
|
|
directory, <code>/home/pws</code>. Then you are telling the shell it can match in
|
|
the directories `<code>/Home/PWS/</code>', `<code>/HOME/pWs</code>' and so on. There's no
|
|
quick way of doing this --- the shell has to look at every single entry
|
|
first in `<code>/</code>' and then in `<code>/home</code>' (assuming that's the only match
|
|
at that level) to check for matches. In summary, it's a good idea to use
|
|
the fact that the flag doesn't have to be at the beginning, and write
|
|
this as:</p>
|
|
<pre><code> print ~/(#i).Z*
|
|
</code></pre>
|
|
<p>Of course,</p>
|
|
<pre><code> print ~/.[zZ]*
|
|
</code></pre>
|
|
<p>would be easier and more standard in this oversimplified example.</p>
|
|
<p>On <code>Cygwin</code>, a UNIX-like layer running on top of, uh, a well known
|
|
graphical user interface claiming to be an operating system, filenames
|
|
are usually case insensitive anyway. Unfortunately, while Cygwin itself
|
|
is wise to this fact, zsh isn't, so it will do all that extra searching
|
|
when you give it the <code>(#i)</code> flag with an otherwise explicit string.</p>
|
|
<p>A piece of good news, however, is that matching of uppercase and
|
|
lowercase characters will handle non-ASCII character sets, provided your
|
|
system handles locales, (or to use the standard hieroglyphics, `i18n'
|
|
--- count the letters between `i' and `n' in `internationalization',
|
|
which may not even be a word anyway, and wince). In that case you or
|
|
your system administrator or the shell environment supplied by your
|
|
operating system vendor needs to set <code>$LC_ALL</code> or <code>$LC_CTYPE</code> to the
|
|
appropriate locale -- C for the default, <code>en</code> for English, <code>uk</code> for
|
|
Ukrainian (which I remember because it's confusing in the United
|
|
Kingdom), and so on.</p>
|
|
<p><strong>`Backreferences'</strong></p>
|
|
<p>The feature labelled as `backreferences' in the manual isn't really
|
|
that at all, which is my fault. Many regular expression matchers allow
|
|
you to refer back to bits already matched. For example, in Perl the
|
|
regular expression `<code>([A-Z]{3})$1</code>' says `match three uppercase
|
|
characters followed by the same three characters again. The `<code>$1</code>' is a
|
|
backreference.</p>
|
|
<p>Zsh has a similar feature, but in fact you can't use it while matching a
|
|
single pattern; it just makes the characters matched by parentheses
|
|
available after a successful complete match. In this, it's a bit more
|
|
like Emacs's <code>match-beginning</code> and <code>match-end</code> functions.</p>
|
|
<p>You have to turn it on for each pattern with the globbing flag
|
|
`<code>(#b)</code>'. The reason for this is that it makes matches involving
|
|
parentheses a bit slower, and most of the time you use parentheses just
|
|
for ordinary filename generation where this feature isn't useful. Like
|
|
most of the other globbing flags, it can have a local effect: only
|
|
parentheses after the flag produce backreferences, and the effect is
|
|
local to enclosing parentheses (which don't feel the effect themselves).
|
|
You can also turn it off with `<code>(#B)</code>'.</p>
|
|
<p>What happens when a pattern with active parentheses matches is that the
|
|
elements of the array <code>$match</code>, <code>$mbegin</code> and <code>$mend</code> are set to reflect
|
|
each active parenthesis in turn --- names inspired by the corresponding
|
|
Emacs feature. The string matching the first pair of parentheses is
|
|
stored in the first element of <code>$match</code>, its start position in the
|
|
string is stored in the first element of <code>$mbegin</code>, and its end position
|
|
in the string <code>$mend</code>. The same happens for later matched parentheses.
|
|
The parentheses around any globbing flags do not count.</p>
|
|
<p><code>$mbegin</code> and <code>$mend</code> use the indexing convention currently in effect,
|
|
i.e. zero offset if <code>KSH_ARRAYS</code> is set, unit offset otherwise. This
|
|
means that if the string matched against is stored in the parameter
|
|
<code>$teststring</code>, then it will always be true that <code>${match[1]}</code> is the
|
|
same string as <code>${teststring[${mbegin[1]},${mend[1]}]}</code>. and so on. (I'm
|
|
assuming, as usual, that <code>KSH_ARRAYS</code> isn't set.) Unfortunately, this is
|
|
different from the way the <code>E</code> parameter flag works --- that substitutes
|
|
the character after the end of the matched substring. Sorry! It's my
|
|
fault for not following that older convention; I thought the string
|
|
subscripting convention was more relevant.</p>
|
|
<p>An obvious use for this is to match directory and non-directory parts of
|
|
a filename:</p>
|
|
<pre><code> local match mbegin mend
|
|
if [[ /a/file/name = (#b)(*)/([^/]##) ]]; then
|
|
print -l ${match[1]} ${match[2]}
|
|
fi
|
|
</code></pre>
|
|
<p>prints `<code>/a/file</code>' and `<code>name</code>'. The second parenthesis matches a
|
|
slash followed by any number of characters, but at least one, which are
|
|
not slashes, while the first matches anything --- remember slashes
|
|
aren't special in a pattern match of this form. Note that if this
|
|
appears in a function, it is a good idea to make the three parameters
|
|
local. You don't have to clear them, or even make them arrays. If the
|
|
match fails, they won't be touched.</p>
|
|
<p>There's a slightly simpler way of getting information about the match:
|
|
the flag <code>(#m)</code> puts the matched string, the start index, and the index
|
|
for the <em>whole</em> match into the scalars <code>$MATCH</code>, <code>$MBEGIN</code> and <code>$MEND</code>.
|
|
It may not be all that obvious why this is useful. Surely the whole
|
|
pattern always matches the whole string? Actually, you've already seen
|
|
cases where this isn't true for parameter substitutions:</p>
|
|
<pre><code> local MATCH MBEGIN MEND string
|
|
string=aLOha
|
|
: ${(S)string##(#m)([A-Z]##)}
|
|
</code></pre>
|
|
<p>You'll find this sets <code>$MATCH</code> to <code>LO</code>, <code>$MBEGIN</code> to 2 and <code>$MEND</code> to 3.
|
|
In the parameter expansion, the <code>(S)</code> is for matching substrings, so
|
|
that the `<code>##</code>' match isn't anchored to the start of <code>$string</code>. The
|
|
pattern is <code>(#m)([A-Z]##)</code>, which means: turn on full-match
|
|
backreferencing and match any number of capital letters, but at least
|
|
one. This matches <code>LO</code>. Then the match parameters let you see where in
|
|
the test parameter the match occurred.</p>
|
|
<p>There's nothing to stop you using both these types of backreferences at
|
|
once, and you can specify multiple globbing flags in the short form
|
|
`<code>(#bm)</code>'. This will work with any combination of flags, except that
|
|
some such as `<code>(#bB)</code>' are obviously silly.</p>
|
|
<p>Because ordinary globbing produces a list of files, rather than just
|
|
one, this feature isn't very useful and is turned off. However, it <em>is</em>
|
|
possible to use backreferences in global substitutions and substitutions
|
|
on arrays; here are both at once:</p>
|
|
<pre><code> % array=(mananan then in gone June)
|
|
% print ${array//(#m)?n/${(C)MATCH[1]}n}
|
|
mAnAnAn thEn In gOne JUne
|
|
</code></pre>
|
|
<p>The substitution occurs separately on each element of the array, and at
|
|
each match in each element <code>$MATCH</code> gets set to what was matched. We use
|
|
this to capitalize every character that is followed by a lowercase
|
|
`<code>n</code>'. This will work with the <code>(#b)</code> form, too. The perl equivalent of
|
|
this is:</p>
|
|
<pre><code> % perl -pe 's/.n/\u$&/g' <<<$array
|
|
mAnAnAn thEn In gOne JUne
|
|
</code></pre>
|
|
<p>(People sometimes say Perl has a difficult syntax to understand; I hope
|
|
I'm convincing you how naive that view is when you have zsh.)</p>
|
|
<p>Now I can convince you of one point I made about excluded matches above:</p>
|
|
<pre><code> % [[ foo = (#b)(^foo)* ]] && print $match
|
|
fo
|
|
</code></pre>
|
|
<p>As claimed, the process of making the longest possible match, then
|
|
backtracking from the end until the whole thing is successful, leads to
|
|
the `<code>(^foo)</code>' matching `<code>fo</code>'.</p>
|
|
<p><strong>Approximate matching</strong></p>
|
|
<p>To my knowledge, zsh is the first command line interpreter to make use
|
|
of approximate matching. This is very useful because it provides the
|
|
shell with an easy way of correcting what you've typed. First, some
|
|
basics about what I mean by `approximate matching'.</p>
|
|
<p>There are four ways you can make a mistake in typing. You can leave out
|
|
a letter which should be there; you can insert a letter which shouldn't;
|
|
you can type one letter instead of another; and you can transpose two
|
|
letters. The last one involves two different characters, so some systems
|
|
for making approximate matches count it as two different errors; but
|
|
it's a particularly common one when typing, and quite useful to be able
|
|
to handle as a single error. I know people who even have `<code>mkae</code>'
|
|
aliased to `<code>make</code>'.</p>
|
|
<p>You can tell zsh how many errors you are willing to allow in a pattern
|
|
match by using, for example <code>(#a1)</code>, which says only a single error
|
|
allowed. The rules for the flag are almost identical to those for
|
|
case-insensitive matching, in particular for scoping and the way
|
|
approximate matching is handled for a filename expansion with more than
|
|
one directory. The number of errors is global; if the shell manages to
|
|
match a directory in a path with an error, one fewer error is allowed
|
|
for the rest of the path. You can specify as many errors as you like;
|
|
the practical limit is that with too many allowed errors the pattern
|
|
will match far too many strings. The shell doesn't have a particularly
|
|
nifty way of handling approximate matching (unlike, for example, the
|
|
program <code>agrep</code>), but you are unlikely to encounter problems if the
|
|
number of matches stays in a useful range.</p>
|
|
<p>The fact that the error count applies to the whole of a filename path is
|
|
a bit of a headache, actually, because we have to make sure the shell
|
|
matches each directory with the minimum number of errors. With a single
|
|
pattern, the shell doesn't care as long as it doesn't use up all the
|
|
errors it has, while with multiple directories if it uses up the errors
|
|
early on, it may fail to match something it should match. But you don't
|
|
have to worry about that; this explanation is just to elicit sympathy.</p>
|
|
<p>So the pattern <code>(#a1)README</code> will match <code>README</code>, <code>READ.ME</code>, <code>READ_ME</code>,
|
|
<code>LEADME</code>, <code>REDME</code>, <code>READEM</code>, and so on. It will not match <code>_README_</code>,
|
|
<code>ReadMe</code>, <code>READ</code> or <code>AAREADME</code>. However, you can combine it with
|
|
case-insensitivity, for which the short form <code>(#ia1)README</code> is allowed,
|
|
and then it will match <code>ReadMe</code>, <code>Read.Me</code>, <code>read_me</code>, and so on. You
|
|
can consider filenames with multiple directories as single strings for
|
|
this purpose --- with one exception, that `<code>foo/bar</code>' and `<code>fo/obar</code>'
|
|
are two errors apart, not one. Because the errors are counted separately
|
|
in each directory, you can't transpose the `<code>/</code>' with another
|
|
character. This restriction doesn't apply in other forms of pattern
|
|
matching where <code>/</code> is not a special character.</p>
|
|
<p>Another common feature with case-insensitive matching is that only the
|
|
literal parts of the string are handled. So if you have `<code>[0-9]</code>' in a
|
|
pattern, that character must match a decimal digit even if approximation
|
|
is active. This is often useful to impose a particular form at key
|
|
points. The main difficulty, as with the `<code>/</code>' in a directory, is that
|
|
transposing with another character is not allowed, either. In other
|
|
words, `<code>(#a1)ab[0-9]</code>' will fail to match `<code>a1b</code>'; it will match with
|
|
two errors, by removing the `<code>b</code>' before the digit and inserting it
|
|
after.</p>
|
|
<p>As an example of what you can do with this feature, here is a simple
|
|
function to correct misspelled filenames.</p>
|
|
<pre><code> emulate -LR zsh
|
|
setopt extendedglob
|
|
|
|
local file trylist
|
|
integer approx max_approx=6
|
|
|
|
file=$1
|
|
|
|
if [[ -e $file ]]; then
|
|
# no correction necessary
|
|
print $file
|
|
return
|
|
fi
|
|
|
|
for (( approx = 1; approx <= max_approx; approx++ )); do
|
|
trylist=( (#a$approx)"$file"(N) )
|
|
(( $#trylist )) && break
|
|
done
|
|
(( $#trylist )) || return 1
|
|
|
|
print $trylist
|
|
</code></pre>
|
|
<p>The function tries to match a file with the minimum possible number of
|
|
errors, but in any case no more than 6. As soon as it finds a match, it
|
|
will print it out and exit. It's still possible there is more than one
|
|
match with that many errors, however, and in this case the complete list
|
|
is printed. The function doesn't handle `<code>~</code>' in the filename.</p>
|
|
<p>It does illustrate the fact that you can specify the number of
|
|
approximations as a parameter. This is purely a consequence of the fact
|
|
that filename generation happens right at the end of the expansion
|
|
sequence, after the parameters have already been substituted away. The
|
|
numbers and the letter in the globbing flag aren't special characters,
|
|
unlike the parentheses and the `<code>#</code>'; if you wanted those to be special
|
|
when substituted from a parameter, you would need to set the <code>KSH_GLOB</code>
|
|
flag, possibly by using the `<code>~</code>' parameter flag.</p>
|
|
<p>A more complicated version of that function is included with the shell
|
|
distribution in the file <code>Completion/Base/Widget/_correct_filename</code>.
|
|
This is designed to be used either on its own, or as part of the
|
|
completion system.</p>
|
|
<p>Indeed, the completion system described in the next chapter is where you
|
|
are most likely to come across approximate matching, buried inside
|
|
approximate completion and correction --- in the first case, you tell
|
|
the shell to complete what you have typed, trying to correct mistakes,
|
|
and in the second case, you tell the shell that you have finished typing
|
|
but possibly made some mistakes which it should correct. If you already
|
|
have the new completion system loaded, you can use <code>^Xc</code> to correct a
|
|
word on the command line; this is context-sensitive, so more
|
|
sophisticated than the function I showed.</p>
|
|
<p><strong>Anchors</strong></p>
|
|
<p>The last two globbing flags are probably the least used. They are there
|
|
really for completeness. They are <code>(#s)</code>, to match only at the start of
|
|
a string, and <code>(#e)</code>, to match only at the end. Unlike the other flags
|
|
they are purely local, just making a statement about the point where
|
|
they occur in the pattern.</p>
|
|
<p>They correspond to the much more commonly used `<code>^</code>' and `<code>$</code>' in
|
|
regular expressions. The difference is that shell patterns nearly always
|
|
match a complete string, so telling the pattern that a certain point is
|
|
the start or end isn't usually very useful. There are two occasions when
|
|
it is. The first is when the start or end is to be matched as an
|
|
alternative to something else. For example,</p>
|
|
<pre><code> [[ $file = *((#s)|/)dirpart((#e)|/)* ]]
|
|
</code></pre>
|
|
<p>succeeds if <code>dirpart</code> is a complete path segment of <code>$file</code> --- with a
|
|
slash or nothing at all before and after it. Remember, once again, that
|
|
slashes aren't special in pattern matches unless they're performing
|
|
filename generation. The effect of these two flags isn't altered at all
|
|
by their being inside another set of parentheses.</p>
|
|
<p>The second time these are useful is in parameter matches where the
|
|
pattern is not guaranteed to match a complete string. If you use <code>(#s)</code>
|
|
or <code>(#e)</code>, it will force that point to be the start or end despite the
|
|
operator in use. So <code>${</code><em>param</em><code>##</code><em>pattern</em><code>(#e)}</code> will remove
|
|
<em>pattern</em> from <code>$</code><em>param</em> only if it matches the entire string: the <code>##</code>
|
|
must match at the head, while the <code>(#e)</code> must match at the end.</p>
|
|
<p>You can get the effect with <code>${</code><em>param</em><code>:#</code><em>pattern</em><code>}</code>, and further
|
|
more this is rather faster. The <code>:#</code> operator has some global knowledge
|
|
about how to match; it knows that since <em>pattern</em> will match as far as
|
|
it can along the test string, it only needs to try the match once.
|
|
However, since `<code>##</code>' just needs to match at the head of the string, it
|
|
will backtrack along the pattern, trying to match <em>pattern</em><code>(#e)</code>,
|
|
entirely heedless of the fact that the pattern itself specifically won't
|
|
match if it doesn't extend to the end. So it's more efficient to use the
|
|
special parameter operators whenever they're available.</p>
|
|
<p><span id="l143"></span></p>
|
|
<h3 id="598-the-function-zmv"><a class="header" href="#598-the-function-zmv">5.9.8: The function <code>zmv</code></a></h3>
|
|
<p>The shell is supplied with a function <code>zmv</code>, which may have been
|
|
installed into the default <code>$fpath</code>, or can be found in the source tree
|
|
in the directory <code>Functions/Misc</code>. This provides a way of renaming,
|
|
copying and linking files based on patterns. The idea is that you give
|
|
two arguments, a pattern to match, and a string which uses that pattern.
|
|
The pattern to match has backreferences turned on; these are stored in
|
|
the positional parameters to make them easy to refer to. The function
|
|
tries to be safe: any file whose name is not changed is simply ignored,
|
|
and usually overwriting an existing file is an error, too. However, it
|
|
doesn't make sure that there is a one to one mapping from source to
|
|
target files; it doesn't know if the target file is supposed to be a
|
|
directory (though it could be smarter about that).</p>
|
|
<p>In the examples, I will use the option <code>-n</code>, which forces <code>zmv</code> to print
|
|
out what it will do without actually doing it. This is a good thing to
|
|
try first if you are unsure.</p>
|
|
<p>Here's a simple example.</p>
|
|
<pre><code> % ls
|
|
foo
|
|
% zmv -n '(*)' '${(U)1}'
|
|
mv -- foo FOO
|
|
</code></pre>
|
|
<p>The pattern matches anything in the current directory, excluding files
|
|
beginning with a `<code>.</code>' (the function starts with an `<code>emulate</code>', so
|
|
<code>GLOB_DOTS</code> is forced to be off). The complete string is stored as the
|
|
first backreference, which is in turn put into <code>$1</code>. Then the second
|
|
argument is used and <code>$1</code> in uppercase is substituted.</p>
|
|
<p><strong>Essentials of the function</strong></p>
|
|
<p>The basic code in <code>zmv</code> is very simple. It boils down to more or less
|
|
the following.</p>
|
|
<pre><code> setopt nobareglobqual extendedglob
|
|
local files pattern result f1 f2 match mbegin mend
|
|
|
|
pattern=$1
|
|
result=$2
|
|
|
|
for f1 in ${~pattern}; do
|
|
[[ $f1 = (#b)${~pattern} ]] || continue
|
|
set -- $match
|
|
f2=${(e)result}
|
|
mv -- $f1 $f2
|
|
done
|
|
</code></pre>
|
|
<p>Here's what's going on. We store the arguments as <code>$pattern</code> and
|
|
<code>$result</code>. We then expand the pattern to a list of files --- remember
|
|
that <code>${~pattern}</code> makes the characters in <code>$pattern</code> active for the
|
|
purposes of globbing. For each file we find, we match against the
|
|
pattern again, but this time with backreferences turned on, so that
|
|
parentheses are expanded into the array <code>$match</code>. If, for some reason,
|
|
the pattern match failed this time, we just skip the file. Then we store
|
|
<code>$match</code> in the positional parameters; the `<code>-``-</code>' for <code>set</code> and for
|
|
<code>mv</code> is in case <code>$match[1]</code> begins with a `<code>-</code>'.</p>
|
|
<p>Then we evaluate the result, assuming that it will refer to the
|
|
positional parameters. In our example, <code>$result</code> contains argument
|
|
`<code>${(U)1}</code>' and if we matched `<code>foo</code>', then <code>$1</code> contains foo. The
|
|
effect of `<code>${(e)result}</code>' is to perform an extra substitution on the
|
|
<code>${(U)1}</code>, so <code>$f2</code> will be set to <code>FOO</code>. Finally, we use the <code>mv</code>
|
|
command to do the actual renaming. The effect of the <code>-n</code> option isn't
|
|
shown, but it's essentially to put a `<code>print</code>' in front of the <code>mv</code>
|
|
command line.</p>
|
|
<p>Notice I set <code>nobareglobqual</code>, turning off the use of glob qualifiers.
|
|
That's necessary because of all those parentheses; otherwise, `<code>(*)</code>'
|
|
would have been interpreted as a qualifier. There is an option, <code>-Q</code>,
|
|
which will turn qualifiers back on, if you need them. That's still not
|
|
quite ideal, since the second pattern match, the one where we actually
|
|
use the backreferences, isn't filename generation, just a test against a
|
|
string, so doesn't handle glob qualifers. So in that case the code needs
|
|
to strip qualifiers off. It does this by a fairly simple pattern match
|
|
which will work in simple cases, though you can confuse it if you try
|
|
hard enough, particularly if you have extra parentheses in the glob
|
|
qualifier.</p>
|
|
<p>Note also the use of `<code>${(e)result}</code>' to force substitution of
|
|
parameters when <code>$result</code> is evaluated. This way of doing it safely
|
|
ignores other metacharacters which may be around: all <code>$</code>-expansions,
|
|
plus backquote expansion, are performed, but otherwise <code>$result</code> is left
|
|
alone.</p>
|
|
<p><strong>More complicated examples</strong></p>
|
|
<p><code>zmv</code> has some special handling for recursive globbing, but only with
|
|
the patterns <code>**/</code> and <code>***/</code>. If you put either of these in parentheses
|
|
in the pattern, they will be spotted and used in the normal way. Hence,</p>
|
|
<pre><code> % ls test
|
|
lonely
|
|
% zmv -n '(**/)lonely' '$1solitary'
|
|
mv -- test/lonely test/solitary
|
|
</code></pre>
|
|
<p>Note that, as with other uses of `<code>**/</code>', the slash is part of the
|
|
recursive match, so you don't need another one. You don't need to
|
|
separate <code>$1</code> from <code>solitary</code> either, since positional parameters are a
|
|
special case, but you could use `<code>${1}solitary</code>' for the look of it.
|
|
Like glob qualifiers, recursive matches are handled by some magic in the
|
|
function; in ordinary globbing you can't put these two forms inside
|
|
parentheses.</p>
|
|
<p>For the lazy, the option <code>-w</code> (which means `with wildcards') will tell
|
|
<code>zmv</code> to decide for itself where all the patterns are and automatically
|
|
add parentheses. The two examples so far become</p>
|
|
<pre><code> zmv -nw '*' '${(U)1}'
|
|
zmv -nw '***/lonely' '$1solitary'
|
|
</code></pre>
|
|
<p>with exactly the same effects.</p>
|
|
<p>Another way of getting useful effects is to use the `<code>${1//foo/bar}</code>'
|
|
substitution in the second argument. This gives you a general way of
|
|
substitution bits in filenames. Often, you can then get away with having
|
|
`<code>(*)</code>' as the first argument:</p>
|
|
<pre><code> zmv '(*)' '${1//(#m)[aeiou]/${(U)MATCH}}'
|
|
</code></pre>
|
|
<p>capitalises all the vowels in all filenames in the current directory.
|
|
You may be familiar with a perl script called <code>rename</code> which does tricks
|
|
like this (though there's another, less powerful, programme of the same
|
|
name which simply replaces strings).</p>
|
|
<p><strong>The effect of <code>zmv</code></strong></p>
|
|
<p>In addition to renaming, <code>zmv</code> can be made to copy or link files. If you
|
|
call it <code>zcp</code> or <code>zln</code> instead of <code>zmv</code>, it will have those effects, and
|
|
in the case of <code>zln</code> you can use the option <code>-s</code> to create symbolic
|
|
links, just as with <code>ln</code>. Beware the slightly confusing behaviour of
|
|
symbolic links containing relative directories, however.</p>
|
|
<p>Alternatively, you can force the behavour of <code>zmv</code>, <code>zcp</code> and <code>zln</code> just
|
|
by giving the options <code>-M</code>, <code>-C</code> or <code>-L</code> to the function, whatever it is
|
|
called. Or you can use `<code>-p</code> <em>prog</em>' to execute <code>prog</code> instead of <code>mv</code>,
|
|
<code>cp</code> or <code>ln</code>; <em>prog</em> should be able to be run as `<em>prog</em> <code>-``-</code>
|
|
<em>oldname</em> <em>newname</em>', whatever it does.</p>
|
|
<p>The option <code>-i</code> works a bit like the same option to the basic programmes
|
|
which <code>zmv</code> usually calls, prompting you before any action --- in this
|
|
case, not just overwriting, but any action at all. Likewise, <code>-f</code> tells
|
|
<code>zmv</code> to force overwriting of files, which it will usually refuse to do
|
|
because of the potential dangers. Although many versions of <code>mv</code> etc.
|
|
take this option, some don't, so it's not passed down; instead there's a
|
|
generic way of passing down options to the programmes executed, using
|
|
<code>-o</code> followed by a string. For example,</p>
|
|
<pre><code> % ls
|
|
foo
|
|
% zmv -np frud -o'-a -b' '(*)' '${(U)1}'
|
|
frud -a -b -- foo FOO
|
|
</code></pre>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="zshguide04.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next" href="zshguide06.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="zshguide04.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next" href="zshguide06.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script type="text/javascript">
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="searcher.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="book.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
|
|
|
|
</body>
|
|
</html>
|