Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Conversion pipeline templates. |
2 | ||
3 | The problem: | |
4 | ------------ | |
5 | ||
6 | Suppose you have some data that you want to convert to another format, | |
7 | such as from GIF image format to PPM image format. Maybe the | |
8 | conversion involves several steps (e.g. piping it through compress or | |
9 | uuencode). Some of the conversion steps may require that their input | |
10 | is a disk file, others may be able to read standard input; similar for | |
11 | their output. The input to the entire conversion may also be read | |
12 | from a disk file or from an open file, and similar for its output. | |
13 | ||
14 | The module lets you construct a pipeline template by sticking one or | |
15 | more conversion steps together. It will take care of creating and | |
16 | removing temporary files if they are necessary to hold intermediate | |
17 | data. You can then use the template to do conversions from many | |
18 | different sources to many different destinations. The temporary | |
19 | file names used are different each time the template is used. | |
20 | ||
21 | The templates are objects so you can create templates for many | |
22 | different conversion steps and store them in a dictionary, for | |
23 | instance. | |
24 | ||
25 | ||
26 | Directions: | |
27 | ----------- | |
28 | ||
29 | To create a template: | |
30 | t = Template() | |
31 | ||
32 | To add a conversion step to a template: | |
33 | t.append(command, kind) | |
34 | where kind is a string of two characters: the first is '-' if the | |
35 | command reads its standard input or 'f' if it requires a file; the | |
36 | second likewise for the output. The command must be valid /bin/sh | |
37 | syntax. If input or output files are required, they are passed as | |
38 | $IN and $OUT; otherwise, it must be possible to use the command in | |
39 | a pipeline. | |
40 | ||
41 | To add a conversion step at the beginning: | |
42 | t.prepend(command, kind) | |
43 | ||
44 | To convert a file to another file using a template: | |
45 | sts = t.copy(infile, outfile) | |
46 | If infile or outfile are the empty string, standard input is read or | |
47 | standard output is written, respectively. The return value is the | |
48 | exit status of the conversion pipeline. | |
49 | ||
50 | To open a file for reading or writing through a conversion pipeline: | |
51 | fp = t.open(file, mode) | |
52 | where mode is 'r' to read the file, or 'w' to write it -- just like | |
53 | for the built-in function open() or for os.popen(). | |
54 | ||
55 | To create a new template object initialized to a given one: | |
56 | t2 = t.clone() | |
57 | ||
58 | For an example, see the function test() at the end of the file. | |
59 | """ # ' | |
60 | ||
61 | ||
62 | import re | |
63 | ||
64 | import os | |
65 | import tempfile | |
66 | import string | |
67 | ||
68 | __all__ = ["Template"] | |
69 | ||
70 | # Conversion step kinds | |
71 | ||
72 | FILEIN_FILEOUT = 'ff' # Must read & write real files | |
73 | STDIN_FILEOUT = '-f' # Must write a real file | |
74 | FILEIN_STDOUT = 'f-' # Must read a real file | |
75 | STDIN_STDOUT = '--' # Normal pipeline element | |
76 | SOURCE = '.-' # Must be first, writes stdout | |
77 | SINK = '-.' # Must be last, reads stdin | |
78 | ||
79 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ | |
80 | SOURCE, SINK] | |
81 | ||
82 | ||
83 | class Template: | |
84 | """Class representing a pipeline template.""" | |
85 | ||
86 | def __init__(self): | |
87 | """Template() returns a fresh pipeline template.""" | |
88 | self.debugging = 0 | |
89 | self.reset() | |
90 | ||
91 | def __repr__(self): | |
92 | """t.__repr__() implements repr(t).""" | |
93 | return '<Template instance, steps=%r>' % (self.steps,) | |
94 | ||
95 | def reset(self): | |
96 | """t.reset() restores a pipeline template to its initial state.""" | |
97 | self.steps = [] | |
98 | ||
99 | def clone(self): | |
100 | """t.clone() returns a new pipeline template with identical | |
101 | initial state as the current one.""" | |
102 | t = Template() | |
103 | t.steps = self.steps[:] | |
104 | t.debugging = self.debugging | |
105 | return t | |
106 | ||
107 | def debug(self, flag): | |
108 | """t.debug(flag) turns debugging on or off.""" | |
109 | self.debugging = flag | |
110 | ||
111 | def append(self, cmd, kind): | |
112 | """t.append(cmd, kind) adds a new step at the end.""" | |
113 | if type(cmd) is not type(''): | |
114 | raise TypeError, \ | |
115 | 'Template.append: cmd must be a string' | |
116 | if kind not in stepkinds: | |
117 | raise ValueError, \ | |
118 | 'Template.append: bad kind %r' % (kind,) | |
119 | if kind == SOURCE: | |
120 | raise ValueError, \ | |
121 | 'Template.append: SOURCE can only be prepended' | |
122 | if self.steps and self.steps[-1][1] == SINK: | |
123 | raise ValueError, \ | |
124 | 'Template.append: already ends with SINK' | |
125 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): | |
126 | raise ValueError, \ | |
127 | 'Template.append: missing $IN in cmd' | |
128 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): | |
129 | raise ValueError, \ | |
130 | 'Template.append: missing $OUT in cmd' | |
131 | self.steps.append((cmd, kind)) | |
132 | ||
133 | def prepend(self, cmd, kind): | |
134 | """t.prepend(cmd, kind) adds a new step at the front.""" | |
135 | if type(cmd) is not type(''): | |
136 | raise TypeError, \ | |
137 | 'Template.prepend: cmd must be a string' | |
138 | if kind not in stepkinds: | |
139 | raise ValueError, \ | |
140 | 'Template.prepend: bad kind %r' % (kind,) | |
141 | if kind == SINK: | |
142 | raise ValueError, \ | |
143 | 'Template.prepend: SINK can only be appended' | |
144 | if self.steps and self.steps[0][1] == SOURCE: | |
145 | raise ValueError, \ | |
146 | 'Template.prepend: already begins with SOURCE' | |
147 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): | |
148 | raise ValueError, \ | |
149 | 'Template.prepend: missing $IN in cmd' | |
150 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): | |
151 | raise ValueError, \ | |
152 | 'Template.prepend: missing $OUT in cmd' | |
153 | self.steps.insert(0, (cmd, kind)) | |
154 | ||
155 | def open(self, file, rw): | |
156 | """t.open(file, rw) returns a pipe or file object open for | |
157 | reading or writing; the file is the other end of the pipeline.""" | |
158 | if rw == 'r': | |
159 | return self.open_r(file) | |
160 | if rw == 'w': | |
161 | return self.open_w(file) | |
162 | raise ValueError, \ | |
163 | 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) | |
164 | ||
165 | def open_r(self, file): | |
166 | """t.open_r(file) and t.open_w(file) implement | |
167 | t.open(file, 'r') and t.open(file, 'w') respectively.""" | |
168 | if not self.steps: | |
169 | return open(file, 'r') | |
170 | if self.steps[-1][1] == SINK: | |
171 | raise ValueError, \ | |
172 | 'Template.open_r: pipeline ends width SINK' | |
173 | cmd = self.makepipeline(file, '') | |
174 | return os.popen(cmd, 'r') | |
175 | ||
176 | def open_w(self, file): | |
177 | if not self.steps: | |
178 | return open(file, 'w') | |
179 | if self.steps[0][1] == SOURCE: | |
180 | raise ValueError, \ | |
181 | 'Template.open_w: pipeline begins with SOURCE' | |
182 | cmd = self.makepipeline('', file) | |
183 | return os.popen(cmd, 'w') | |
184 | ||
185 | def copy(self, infile, outfile): | |
186 | return os.system(self.makepipeline(infile, outfile)) | |
187 | ||
188 | def makepipeline(self, infile, outfile): | |
189 | cmd = makepipeline(infile, self.steps, outfile) | |
190 | if self.debugging: | |
191 | print cmd | |
192 | cmd = 'set -x; ' + cmd | |
193 | return cmd | |
194 | ||
195 | ||
196 | def makepipeline(infile, steps, outfile): | |
197 | # Build a list with for each command: | |
198 | # [input filename or '', command string, kind, output filename or ''] | |
199 | ||
200 | list = [] | |
201 | for cmd, kind in steps: | |
202 | list.append(['', cmd, kind, '']) | |
203 | # | |
204 | # Make sure there is at least one step | |
205 | # | |
206 | if not list: | |
207 | list.append(['', 'cat', '--', '']) | |
208 | # | |
209 | # Take care of the input and output ends | |
210 | # | |
211 | [cmd, kind] = list[0][1:3] | |
212 | if kind[0] == 'f' and not infile: | |
213 | list.insert(0, ['', 'cat', '--', '']) | |
214 | list[0][0] = infile | |
215 | # | |
216 | [cmd, kind] = list[-1][1:3] | |
217 | if kind[1] == 'f' and not outfile: | |
218 | list.append(['', 'cat', '--', '']) | |
219 | list[-1][-1] = outfile | |
220 | # | |
221 | # Invent temporary files to connect stages that need files | |
222 | # | |
223 | garbage = [] | |
224 | for i in range(1, len(list)): | |
225 | lkind = list[i-1][2] | |
226 | rkind = list[i][2] | |
227 | if lkind[1] == 'f' or rkind[0] == 'f': | |
228 | (fd, temp) = tempfile.mkstemp() | |
229 | os.close(fd) | |
230 | garbage.append(temp) | |
231 | list[i-1][-1] = list[i][0] = temp | |
232 | # | |
233 | for item in list: | |
234 | [inf, cmd, kind, outf] = item | |
235 | if kind[1] == 'f': | |
236 | cmd = 'OUT=' + quote(outf) + '; ' + cmd | |
237 | if kind[0] == 'f': | |
238 | cmd = 'IN=' + quote(inf) + '; ' + cmd | |
239 | if kind[0] == '-' and inf: | |
240 | cmd = cmd + ' <' + quote(inf) | |
241 | if kind[1] == '-' and outf: | |
242 | cmd = cmd + ' >' + quote(outf) | |
243 | item[1] = cmd | |
244 | # | |
245 | cmdlist = list[0][1] | |
246 | for item in list[1:]: | |
247 | [cmd, kind] = item[1:3] | |
248 | if item[0] == '': | |
249 | if 'f' in kind: | |
250 | cmd = '{ ' + cmd + '; }' | |
251 | cmdlist = cmdlist + ' |\n' + cmd | |
252 | else: | |
253 | cmdlist = cmdlist + '\n' + cmd | |
254 | # | |
255 | if garbage: | |
256 | rmcmd = 'rm -f' | |
257 | for file in garbage: | |
258 | rmcmd = rmcmd + ' ' + quote(file) | |
259 | trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' | |
260 | cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd | |
261 | # | |
262 | return cmdlist | |
263 | ||
264 | ||
265 | # Reliably quote a string as a single argument for /bin/sh | |
266 | ||
267 | _safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted | |
268 | _funnychars = '"`$\\' # Unsafe inside "double quotes" | |
269 | ||
270 | def quote(file): | |
271 | for c in file: | |
272 | if c not in _safechars: | |
273 | break | |
274 | else: | |
275 | return file | |
276 | if '\'' not in file: | |
277 | return '\'' + file + '\'' | |
278 | res = '' | |
279 | for c in file: | |
280 | if c in _funnychars: | |
281 | c = '\\' + c | |
282 | res = res + c | |
283 | return '"' + res + '"' | |
284 | ||
285 | ||
286 | # Small test program and example | |
287 | ||
288 | def test(): | |
289 | print 'Testing...' | |
290 | t = Template() | |
291 | t.append('togif $IN $OUT', 'ff') | |
292 | t.append('giftoppm', '--') | |
293 | t.append('ppmtogif >$OUT', '-f') | |
294 | t.append('fromgif $IN $OUT', 'ff') | |
295 | t.debug(1) | |
296 | FILE = '/usr/local/images/rgb/rogues/guido.rgb' | |
297 | t.copy(FILE, '@temp') | |
298 | print 'Done.' |