Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLTHRTUT 1" | |
132 | .TH PERLTHRTUT 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlthrtut \- tutorial on threads in Perl | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | \&\fB\s-1NOTE\s0\fR: this tutorial describes the new Perl threading flavour | |
138 | introduced in Perl 5.6.0 called interpreter threads, or \fBithreads\fR | |
139 | for short. In this model each thread runs in its own Perl interpreter, | |
140 | and any data sharing between threads must be explicit. | |
141 | .PP | |
142 | There is another older Perl threading flavour called the 5.005 model, | |
143 | unsurprisingly for 5.005 versions of Perl. The old model is known to | |
144 | have problems, deprecated, and will probably be removed around release | |
145 | 5.10. You are strongly encouraged to migrate any existing 5.005 | |
146 | threads code to the new model as soon as possible. | |
147 | .PP | |
148 | You can see which (or neither) threading flavour you have by | |
149 | running \f(CW\*(C`perl \-V\*(C'\fR and looking at the \f(CW\*(C`Platform\*(C'\fR section. | |
150 | If you have \f(CW\*(C`useithreads=define\*(C'\fR you have ithreads, if you | |
151 | have \f(CW\*(C`use5005threads=define\*(C'\fR you have 5.005 threads. | |
152 | If you have neither, you don't have any thread support built in. | |
153 | If you have both, you are in trouble. | |
154 | .PP | |
155 | The user-level interface to the 5.005 threads was via the Threads | |
156 | class, while ithreads uses the threads class. Note the change in case. | |
157 | .SH "Status" | |
158 | .IX Header "Status" | |
159 | The ithreads code has been available since Perl 5.6.0, and is considered | |
160 | stable. The user-level interface to ithreads (the threads classes) | |
161 | appeared in the 5.8.0 release, and as of this time is considered stable | |
162 | although it should be treated with caution as with all new features. | |
163 | .SH "What Is A Thread Anyway?" | |
164 | .IX Header "What Is A Thread Anyway?" | |
165 | A thread is a flow of control through a program with a single | |
166 | execution point. | |
167 | .PP | |
168 | Sounds an awful lot like a process, doesn't it? Well, it should. | |
169 | Threads are one of the pieces of a process. Every process has at least | |
170 | one thread and, up until now, every process running Perl had only one | |
171 | thread. With 5.8, though, you can create extra threads. We're going | |
172 | to show you how, when, and why. | |
173 | .SH "Threaded Program Models" | |
174 | .IX Header "Threaded Program Models" | |
175 | There are three basic ways that you can structure a threaded | |
176 | program. Which model you choose depends on what you need your program | |
177 | to do. For many non-trivial threaded programs you'll need to choose | |
178 | different models for different pieces of your program. | |
179 | .Sh "Boss/Worker" | |
180 | .IX Subsection "Boss/Worker" | |
181 | The boss/worker model usually has one `boss' thread and one or more | |
182 | `worker' threads. The boss thread gathers or generates tasks that need | |
183 | to be done, then parcels those tasks out to the appropriate worker | |
184 | thread. | |
185 | .PP | |
186 | This model is common in \s-1GUI\s0 and server programs, where a main thread | |
187 | waits for some event and then passes that event to the appropriate | |
188 | worker threads for processing. Once the event has been passed on, the | |
189 | boss thread goes back to waiting for another event. | |
190 | .PP | |
191 | The boss thread does relatively little work. While tasks aren't | |
192 | necessarily performed faster than with any other method, it tends to | |
193 | have the best user-response times. | |
194 | .Sh "Work Crew" | |
195 | .IX Subsection "Work Crew" | |
196 | In the work crew model, several threads are created that do | |
197 | essentially the same thing to different pieces of data. It closely | |
198 | mirrors classical parallel processing and vector processors, where a | |
199 | large array of processors do the exact same thing to many pieces of | |
200 | data. | |
201 | .PP | |
202 | This model is particularly useful if the system running the program | |
203 | will distribute multiple threads across different processors. It can | |
204 | also be useful in ray tracing or rendering engines, where the | |
205 | individual threads can pass on interim results to give the user visual | |
206 | feedback. | |
207 | .Sh "Pipeline" | |
208 | .IX Subsection "Pipeline" | |
209 | The pipeline model divides up a task into a series of steps, and | |
210 | passes the results of one step on to the thread processing the | |
211 | next. Each thread does one thing to each piece of data and passes the | |
212 | results to the next thread in line. | |
213 | .PP | |
214 | This model makes the most sense if you have multiple processors so two | |
215 | or more threads will be executing in parallel, though it can often | |
216 | make sense in other contexts as well. It tends to keep the individual | |
217 | tasks small and simple, as well as allowing some parts of the pipeline | |
218 | to block (on I/O or system calls, for example) while other parts keep | |
219 | going. If you're running different parts of the pipeline on different | |
220 | processors you may also take advantage of the caches on each | |
221 | processor. | |
222 | .PP | |
223 | This model is also handy for a form of recursive programming where, | |
224 | rather than having a subroutine call itself, it instead creates | |
225 | another thread. Prime and Fibonacci generators both map well to this | |
226 | form of the pipeline model. (A version of a prime number generator is | |
227 | presented later on.) | |
228 | .SH "Native threads" | |
229 | .IX Header "Native threads" | |
230 | There are several different ways to implement threads on a system. How | |
231 | threads are implemented depends both on the vendor and, in some cases, | |
232 | the version of the operating system. Often the first implementation | |
233 | will be relatively simple, but later versions of the \s-1OS\s0 will be more | |
234 | sophisticated. | |
235 | .PP | |
236 | While the information in this section is useful, it's not necessary, | |
237 | so you can skip it if you don't feel up to it. | |
238 | .PP | |
239 | There are three basic categories of threads: user-mode threads, kernel | |
240 | threads, and multiprocessor kernel threads. | |
241 | .PP | |
242 | User-mode threads are threads that live entirely within a program and | |
243 | its libraries. In this model, the \s-1OS\s0 knows nothing about threads. As | |
244 | far as it's concerned, your process is just a process. | |
245 | .PP | |
246 | This is the easiest way to implement threads, and the way most OSes | |
247 | start. The big disadvantage is that, since the \s-1OS\s0 knows nothing about | |
248 | threads, if one thread blocks they all do. Typical blocking activities | |
249 | include most system calls, most I/O, and things like \fIsleep()\fR. | |
250 | .PP | |
251 | Kernel threads are the next step in thread evolution. The \s-1OS\s0 knows | |
252 | about kernel threads, and makes allowances for them. The main | |
253 | difference between a kernel thread and a user-mode thread is | |
254 | blocking. With kernel threads, things that block a single thread don't | |
255 | block other threads. This is not the case with user-mode threads, | |
256 | where the kernel blocks at the process level and not the thread level. | |
257 | .PP | |
258 | This is a big step forward, and can give a threaded program quite a | |
259 | performance boost over non-threaded programs. Threads that block | |
260 | performing I/O, for example, won't block threads that are doing other | |
261 | things. Each process still has only one thread running at once, | |
262 | though, regardless of how many CPUs a system might have. | |
263 | .PP | |
264 | Since kernel threading can interrupt a thread at any time, they will | |
265 | uncover some of the implicit locking assumptions you may make in your | |
266 | program. For example, something as simple as \f(CW\*(C`$a = $a + 2\*(C'\fR can behave | |
267 | unpredictably with kernel threads if \f(CW$a\fR is visible to other | |
268 | threads, as another thread may have changed \f(CW$a\fR between the time it | |
269 | was fetched on the right hand side and the time the new value is | |
270 | stored. | |
271 | .PP | |
272 | Multiprocessor kernel threads are the final step in thread | |
273 | support. With multiprocessor kernel threads on a machine with multiple | |
274 | CPUs, the \s-1OS\s0 may schedule two or more threads to run simultaneously on | |
275 | different CPUs. | |
276 | .PP | |
277 | This can give a serious performance boost to your threaded program, | |
278 | since more than one thread will be executing at the same time. As a | |
279 | tradeoff, though, any of those nagging synchronization issues that | |
280 | might not have shown with basic kernel threads will appear with a | |
281 | vengeance. | |
282 | .PP | |
283 | In addition to the different levels of \s-1OS\s0 involvement in threads, | |
284 | different OSes (and different thread implementations for a particular | |
285 | \&\s-1OS\s0) allocate \s-1CPU\s0 cycles to threads in different ways. | |
286 | .PP | |
287 | Cooperative multitasking systems have running threads give up control | |
288 | if one of two things happen. If a thread calls a yield function, it | |
289 | gives up control. It also gives up control if the thread does | |
290 | something that would cause it to block, such as perform I/O. In a | |
291 | cooperative multitasking implementation, one thread can starve all the | |
292 | others for \s-1CPU\s0 time if it so chooses. | |
293 | .PP | |
294 | Preemptive multitasking systems interrupt threads at regular intervals | |
295 | while the system decides which thread should run next. In a preemptive | |
296 | multitasking system, one thread usually won't monopolize the \s-1CPU\s0. | |
297 | .PP | |
298 | On some systems, there can be cooperative and preemptive threads | |
299 | running simultaneously. (Threads running with realtime priorities | |
300 | often behave cooperatively, for example, while threads running at | |
301 | normal priorities behave preemptively.) | |
302 | .SH "What kind of threads are Perl threads?" | |
303 | .IX Header "What kind of threads are Perl threads?" | |
304 | If you have experience with other thread implementations, you might | |
305 | find that things aren't quite what you expect. It's very important to | |
306 | remember when dealing with Perl threads that Perl Threads Are Not X | |
307 | Threads, for all values of X. They aren't \s-1POSIX\s0 threads, or | |
308 | DecThreads, or Java's Green threads, or Win32 threads. There are | |
309 | similarities, and the broad concepts are the same, but if you start | |
310 | looking for implementation details you're going to be either | |
311 | disappointed or confused. Possibly both. | |
312 | .PP | |
313 | This is not to say that Perl threads are completely different from | |
314 | everything that's ever come before\*(--they're not. Perl's threading | |
315 | model owes a lot to other thread models, especially \s-1POSIX\s0. Just as | |
316 | Perl is not C, though, Perl threads are not \s-1POSIX\s0 threads. So if you | |
317 | find yourself looking for mutexes, or thread priorities, it's time to | |
318 | step back a bit and think about what you want to do and how Perl can | |
319 | do it. | |
320 | .PP | |
321 | However it is important to remember that Perl threads cannot magically | |
322 | do things unless your operating systems threads allows it. So if your | |
323 | system blocks the entire process on \fIsleep()\fR, Perl usually will as well. | |
324 | .PP | |
325 | Perl Threads Are Different. | |
326 | .SH "Thread-Safe Modules" | |
327 | .IX Header "Thread-Safe Modules" | |
328 | The addition of threads has changed Perl's internals | |
329 | substantially. There are implications for people who write | |
330 | modules with \s-1XS\s0 code or external libraries. However, since perl data is | |
331 | not shared among threads by default, Perl modules stand a high chance of | |
332 | being thread-safe or can be made thread-safe easily. Modules that are not | |
333 | tagged as thread-safe should be tested or code reviewed before being used | |
334 | in production code. | |
335 | .PP | |
336 | Not all modules that you might use are thread\-safe, and you should | |
337 | always assume a module is unsafe unless the documentation says | |
338 | otherwise. This includes modules that are distributed as part of the | |
339 | core. Threads are a new feature, and even some of the standard | |
340 | modules aren't thread\-safe. | |
341 | .PP | |
342 | Even if a module is thread\-safe, it doesn't mean that the module is optimized | |
343 | to work well with threads. A module could possibly be rewritten to utilize | |
344 | the new features in threaded Perl to increase performance in a threaded | |
345 | environment. | |
346 | .PP | |
347 | If you're using a module that's not thread-safe for some reason, you | |
348 | can protect yourself by using it from one, and only one thread at all. | |
349 | If you need multiple threads to access such a module, you can use semaphores and | |
350 | lots of programming discipline to control access to it. Semaphores | |
351 | are covered in \*(L"Basic semaphores\*(R". | |
352 | .PP | |
353 | See also \*(L"Thread\-Safety of System Libraries\*(R". | |
354 | .SH "Thread Basics" | |
355 | .IX Header "Thread Basics" | |
356 | The core threads module provides the basic functions you need to write | |
357 | threaded programs. In the following sections we'll cover the basics, | |
358 | showing you what you need to do to create a threaded program. After | |
359 | that, we'll go over some of the features of the threads module that | |
360 | make threaded programming easier. | |
361 | .Sh "Basic Thread Support" | |
362 | .IX Subsection "Basic Thread Support" | |
363 | Thread support is a Perl compile-time option \- it's something that's | |
364 | turned on or off when Perl is built at your site, rather than when | |
365 | your programs are compiled. If your Perl wasn't compiled with thread | |
366 | support enabled, then any attempt to use threads will fail. | |
367 | .PP | |
368 | Your programs can use the Config module to check whether threads are | |
369 | enabled. If your program can't run without them, you can say something | |
370 | like: | |
371 | .PP | |
372 | .Vb 1 | |
373 | \& $Config{useithreads} or die "Recompile Perl with threads to run this program."; | |
374 | .Ve | |
375 | .PP | |
376 | A possibly-threaded program using a possibly-threaded module might | |
377 | have code like this: | |
378 | .PP | |
379 | .Vb 2 | |
380 | \& use Config; | |
381 | \& use MyMod; | |
382 | .Ve | |
383 | .PP | |
384 | .Vb 10 | |
385 | \& BEGIN { | |
386 | \& if ($Config{useithreads}) { | |
387 | \& # We have threads | |
388 | \& require MyMod_threaded; | |
389 | \& import MyMod_threaded; | |
390 | \& } else { | |
391 | \& require MyMod_unthreaded; | |
392 | \& import MyMod_unthreaded; | |
393 | \& } | |
394 | \& } | |
395 | .Ve | |
396 | .PP | |
397 | Since code that runs both with and without threads is usually pretty | |
398 | messy, it's best to isolate the thread-specific code in its own | |
399 | module. In our example above, that's what MyMod_threaded is, and it's | |
400 | only imported if we're running on a threaded Perl. | |
401 | .Sh "A Note about the Examples" | |
402 | .IX Subsection "A Note about the Examples" | |
403 | Although thread support is considered to be stable, there are still a number | |
404 | of quirks that may startle you when you try out any of the examples below. | |
405 | In a real situation, care should be taken that all threads are finished | |
406 | executing before the program exits. That care has \fBnot\fR been taken in these | |
407 | examples in the interest of simplicity. Running these examples \*(L"as is\*(R" will | |
408 | produce error messages, usually caused by the fact that there are still | |
409 | threads running when the program exits. You should not be alarmed by this. | |
410 | Future versions of Perl may fix this problem. | |
411 | .Sh "Creating Threads" | |
412 | .IX Subsection "Creating Threads" | |
413 | The threads package provides the tools you need to create new | |
414 | threads. Like any other module, you need to tell Perl that you want to use | |
415 | it; \f(CW\*(C`use threads\*(C'\fR imports all the pieces you need to create basic | |
416 | threads. | |
417 | .PP | |
418 | The simplest, most straightforward way to create a thread is with \fInew()\fR: | |
419 | .PP | |
420 | .Vb 1 | |
421 | \& use threads; | |
422 | .Ve | |
423 | .PP | |
424 | .Vb 1 | |
425 | \& $thr = threads->new(\e&sub1); | |
426 | .Ve | |
427 | .PP | |
428 | .Vb 3 | |
429 | \& sub sub1 { | |
430 | \& print "In the thread\en"; | |
431 | \& } | |
432 | .Ve | |
433 | .PP | |
434 | The \fInew()\fR method takes a reference to a subroutine and creates a new | |
435 | thread, which starts executing in the referenced subroutine. Control | |
436 | then passes both to the subroutine and the caller. | |
437 | .PP | |
438 | If you need to, your program can pass parameters to the subroutine as | |
439 | part of the thread startup. Just include the list of parameters as | |
440 | part of the \f(CW\*(C`threads::new\*(C'\fR call, like this: | |
441 | .PP | |
442 | .Vb 1 | |
443 | \& use threads; | |
444 | .Ve | |
445 | .PP | |
446 | .Vb 4 | |
447 | \& $Param3 = "foo"; | |
448 | \& $thr = threads->new(\e&sub1, "Param 1", "Param 2", $Param3); | |
449 | \& $thr = threads->new(\e&sub1, @ParamList); | |
450 | \& $thr = threads->new(\e&sub1, qw(Param1 Param2 Param3)); | |
451 | .Ve | |
452 | .PP | |
453 | .Vb 5 | |
454 | \& sub sub1 { | |
455 | \& my @InboundParameters = @_; | |
456 | \& print "In the thread\en"; | |
457 | \& print "got parameters >", join("<>", @InboundParameters), "<\en"; | |
458 | \& } | |
459 | .Ve | |
460 | .PP | |
461 | The last example illustrates another feature of threads. You can spawn | |
462 | off several threads using the same subroutine. Each thread executes | |
463 | the same subroutine, but in a separate thread with a separate | |
464 | environment and potentially separate arguments. | |
465 | .PP | |
466 | \&\f(CW\*(C`create()\*(C'\fR is a synonym for \f(CW\*(C`new()\*(C'\fR. | |
467 | .Sh "Giving up control" | |
468 | .IX Subsection "Giving up control" | |
469 | There are times when you may find it useful to have a thread | |
470 | explicitly give up the \s-1CPU\s0 to another thread. Your threading package | |
471 | might not support preemptive multitasking for threads, for example, or | |
472 | you may be doing something processor-intensive and want to make sure | |
473 | that the user-interface thread gets called frequently. Regardless, | |
474 | there are times that you might want a thread to give up the processor. | |
475 | .PP | |
476 | Perl's threading package provides the \fIyield()\fR function that does | |
477 | this. \fIyield()\fR is pretty straightforward, and works like this: | |
478 | .PP | |
479 | .Vb 1 | |
480 | \& use threads; | |
481 | .Ve | |
482 | .PP | |
483 | .Vb 8 | |
484 | \& sub loop { | |
485 | \& my $thread = shift; | |
486 | \& my $foo = 50; | |
487 | \& while($foo--) { print "in thread $thread\en" } | |
488 | \& threads->yield; | |
489 | \& $foo = 50; | |
490 | \& while($foo--) { print "in thread $thread\en" } | |
491 | \& } | |
492 | .Ve | |
493 | .PP | |
494 | .Vb 3 | |
495 | \& my $thread1 = threads->new(\e&loop, 'first'); | |
496 | \& my $thread2 = threads->new(\e&loop, 'second'); | |
497 | \& my $thread3 = threads->new(\e&loop, 'third'); | |
498 | .Ve | |
499 | .PP | |
500 | It is important to remember that \fIyield()\fR is only a hint to give up the \s-1CPU\s0, | |
501 | it depends on your hardware, \s-1OS\s0 and threading libraries what actually happens. | |
502 | Therefore it is important to note that one should not build the scheduling of | |
503 | the threads around \fIyield()\fR calls. It might work on your platform but it won't | |
504 | work on another platform. | |
505 | .Sh "Waiting For A Thread To Exit" | |
506 | .IX Subsection "Waiting For A Thread To Exit" | |
507 | Since threads are also subroutines, they can return values. To wait | |
508 | for a thread to exit and extract any values it might return, you can | |
509 | use the \fIjoin()\fR method: | |
510 | .PP | |
511 | .Vb 1 | |
512 | \& use threads; | |
513 | .Ve | |
514 | .PP | |
515 | .Vb 1 | |
516 | \& $thr = threads->new(\e&sub1); | |
517 | .Ve | |
518 | .PP | |
519 | .Vb 2 | |
520 | \& @ReturnData = $thr->join; | |
521 | \& print "Thread returned @ReturnData"; | |
522 | .Ve | |
523 | .PP | |
524 | .Vb 1 | |
525 | \& sub sub1 { return "Fifty-six", "foo", 2; } | |
526 | .Ve | |
527 | .PP | |
528 | In the example above, the \fIjoin()\fR method returns as soon as the thread | |
529 | ends. In addition to waiting for a thread to finish and gathering up | |
530 | any values that the thread might have returned, \fIjoin()\fR also performs | |
531 | any \s-1OS\s0 cleanup necessary for the thread. That cleanup might be | |
532 | important, especially for long-running programs that spawn lots of | |
533 | threads. If you don't want the return values and don't want to wait | |
534 | for the thread to finish, you should call the \fIdetach()\fR method | |
535 | instead, as described next. | |
536 | .Sh "Ignoring A Thread" | |
537 | .IX Subsection "Ignoring A Thread" | |
538 | \&\fIjoin()\fR does three things: it waits for a thread to exit, cleans up | |
539 | after it, and returns any data the thread may have produced. But what | |
540 | if you're not interested in the thread's return values, and you don't | |
541 | really care when the thread finishes? All you want is for the thread | |
542 | to get cleaned up after when it's done. | |
543 | .PP | |
544 | In this case, you use the \fIdetach()\fR method. Once a thread is detached, | |
545 | it'll run until it's finished, then Perl will clean up after it | |
546 | automatically. | |
547 | .PP | |
548 | .Vb 1 | |
549 | \& use threads; | |
550 | .Ve | |
551 | .PP | |
552 | .Vb 1 | |
553 | \& $thr = threads->new(\e&sub1); # Spawn the thread | |
554 | .Ve | |
555 | .PP | |
556 | .Vb 1 | |
557 | \& $thr->detach; # Now we officially don't care any more | |
558 | .Ve | |
559 | .PP | |
560 | .Vb 8 | |
561 | \& sub sub1 { | |
562 | \& $a = 0; | |
563 | \& while (1) { | |
564 | \& $a++; | |
565 | \& print "\e$a is $a\en"; | |
566 | \& sleep 1; | |
567 | \& } | |
568 | \& } | |
569 | .Ve | |
570 | .PP | |
571 | Once a thread is detached, it may not be joined, and any return data | |
572 | that it might have produced (if it was done and waiting for a join) is | |
573 | lost. | |
574 | .SH "Threads And Data" | |
575 | .IX Header "Threads And Data" | |
576 | Now that we've covered the basics of threads, it's time for our next | |
577 | topic: data. Threading introduces a couple of complications to data | |
578 | access that non-threaded programs never need to worry about. | |
579 | .Sh "Shared And Unshared Data" | |
580 | .IX Subsection "Shared And Unshared Data" | |
581 | The biggest difference between Perl ithreads and the old 5.005 style | |
582 | threading, or for that matter, to most other threading systems out there, | |
583 | is that by default, no data is shared. When a new perl thread is created, | |
584 | all the data associated with the current thread is copied to the new | |
585 | thread, and is subsequently private to that new thread! | |
586 | This is similar in feel to what happens when a \s-1UNIX\s0 process forks, | |
587 | except that in this case, the data is just copied to a different part of | |
588 | memory within the same process rather than a real fork taking place. | |
589 | .PP | |
590 | To make use of threading however, one usually wants the threads to share | |
591 | at least some data between themselves. This is done with the | |
592 | threads::shared module and the \f(CW\*(C` : shared\*(C'\fR attribute: | |
593 | .PP | |
594 | .Vb 2 | |
595 | \& use threads; | |
596 | \& use threads::shared; | |
597 | .Ve | |
598 | .PP | |
599 | .Vb 3 | |
600 | \& my $foo : shared = 1; | |
601 | \& my $bar = 1; | |
602 | \& threads->new(sub { $foo++; $bar++ })->join; | |
603 | .Ve | |
604 | .PP | |
605 | .Vb 2 | |
606 | \& print "$foo\en"; #prints 2 since $foo is shared | |
607 | \& print "$bar\en"; #prints 1 since $bar is not shared | |
608 | .Ve | |
609 | .PP | |
610 | In the case of a shared array, all the array's elements are shared, and for | |
611 | a shared hash, all the keys and values are shared. This places | |
612 | restrictions on what may be assigned to shared array and hash elements: only | |
613 | simple values or references to shared variables are allowed \- this is | |
614 | so that a private variable can't accidentally become shared. A bad | |
615 | assignment will cause the thread to die. For example: | |
616 | .PP | |
617 | .Vb 2 | |
618 | \& use threads; | |
619 | \& use threads::shared; | |
620 | .Ve | |
621 | .PP | |
622 | .Vb 3 | |
623 | \& my $var = 1; | |
624 | \& my $svar : shared = 2; | |
625 | \& my %hash : shared; | |
626 | .Ve | |
627 | .PP | |
628 | .Vb 1 | |
629 | \& ... create some threads ... | |
630 | .Ve | |
631 | .PP | |
632 | .Vb 6 | |
633 | \& $hash{a} = 1; # all threads see exists($hash{a}) and $hash{a} == 1 | |
634 | \& $hash{a} = $var # okay - copy-by-value: same effect as previous | |
635 | \& $hash{a} = $svar # okay - copy-by-value: same effect as previous | |
636 | \& $hash{a} = \e$svar # okay - a reference to a shared variable | |
637 | \& $hash{a} = \e$var # This will die | |
638 | \& delete $hash{a} # okay - all threads will see !exists($hash{a}) | |
639 | .Ve | |
640 | .PP | |
641 | Note that a shared variable guarantees that if two or more threads try to | |
642 | modify it at the same time, the internal state of the variable will not | |
643 | become corrupted. However, there are no guarantees beyond this, as | |
644 | explained in the next section. | |
645 | .Sh "Thread Pitfalls: Races" | |
646 | .IX Subsection "Thread Pitfalls: Races" | |
647 | While threads bring a new set of useful tools, they also bring a | |
648 | number of pitfalls. One pitfall is the race condition: | |
649 | .PP | |
650 | .Vb 2 | |
651 | \& use threads; | |
652 | \& use threads::shared; | |
653 | .Ve | |
654 | .PP | |
655 | .Vb 3 | |
656 | \& my $a : shared = 1; | |
657 | \& $thr1 = threads->new(\e&sub1); | |
658 | \& $thr2 = threads->new(\e&sub2); | |
659 | .Ve | |
660 | .PP | |
661 | .Vb 3 | |
662 | \& $thr1->join; | |
663 | \& $thr2->join; | |
664 | \& print "$a\en"; | |
665 | .Ve | |
666 | .PP | |
667 | .Vb 2 | |
668 | \& sub sub1 { my $foo = $a; $a = $foo + 1; } | |
669 | \& sub sub2 { my $bar = $a; $a = $bar + 1; } | |
670 | .Ve | |
671 | .PP | |
672 | What do you think \f(CW$a\fR will be? The answer, unfortunately, is \*(L"it | |
673 | depends.\*(R" Both \fIsub1()\fR and \fIsub2()\fR access the global variable \f(CW$a\fR, once | |
674 | to read and once to write. Depending on factors ranging from your | |
675 | thread implementation's scheduling algorithm to the phase of the moon, | |
676 | \&\f(CW$a\fR can be 2 or 3. | |
677 | .PP | |
678 | Race conditions are caused by unsynchronized access to shared | |
679 | data. Without explicit synchronization, there's no way to be sure that | |
680 | nothing has happened to the shared data between the time you access it | |
681 | and the time you update it. Even this simple code fragment has the | |
682 | possibility of error: | |
683 | .PP | |
684 | .Vb 8 | |
685 | \& use threads; | |
686 | \& my $a : shared = 2; | |
687 | \& my $b : shared; | |
688 | \& my $c : shared; | |
689 | \& my $thr1 = threads->create(sub { $b = $a; $a = $b + 1; }); | |
690 | \& my $thr2 = threads->create(sub { $c = $a; $a = $c + 1; }); | |
691 | \& $thr1->join; | |
692 | \& $thr2->join; | |
693 | .Ve | |
694 | .PP | |
695 | Two threads both access \f(CW$a\fR. Each thread can potentially be interrupted | |
696 | at any point, or be executed in any order. At the end, \f(CW$a\fR could be 3 | |
697 | or 4, and both \f(CW$b\fR and \f(CW$c\fR could be 2 or 3. | |
698 | .PP | |
699 | Even \f(CW\*(C`$a += 5\*(C'\fR or \f(CW\*(C`$a++\*(C'\fR are not guaranteed to be atomic. | |
700 | .PP | |
701 | Whenever your program accesses data or resources that can be accessed | |
702 | by other threads, you must take steps to coordinate access or risk | |
703 | data inconsistency and race conditions. Note that Perl will protect its | |
704 | internals from your race conditions, but it won't protect you from you. | |
705 | .SH "Synchronization and control" | |
706 | .IX Header "Synchronization and control" | |
707 | Perl provides a number of mechanisms to coordinate the interactions | |
708 | between themselves and their data, to avoid race conditions and the like. | |
709 | Some of these are designed to resemble the common techniques used in thread | |
710 | libraries such as \f(CW\*(C`pthreads\*(C'\fR; others are Perl\-specific. Often, the | |
711 | standard techniques are clumsy and difficult to get right (such as | |
712 | condition waits). Where possible, it is usually easier to use Perlish | |
713 | techniques such as queues, which remove some of the hard work involved. | |
714 | .Sh "Controlling access: \fIlock()\fP" | |
715 | .IX Subsection "Controlling access: lock()" | |
716 | The \fIlock()\fR function takes a shared variable and puts a lock on it. | |
717 | No other thread may lock the variable until the the variable is unlocked | |
718 | by the thread holding the lock. Unlocking happens automatically | |
719 | when the locking thread exits the outermost block that contains | |
720 | \&\f(CW\*(C`lock()\*(C'\fR function. Using \fIlock()\fR is straightforward: this example has | |
721 | several threads doing some calculations in parallel, and occasionally | |
722 | updating a running total: | |
723 | .PP | |
724 | .Vb 2 | |
725 | \& use threads; | |
726 | \& use threads::shared; | |
727 | .Ve | |
728 | .PP | |
729 | .Vb 1 | |
730 | \& my $total : shared = 0; | |
731 | .Ve | |
732 | .PP | |
733 | .Vb 11 | |
734 | \& sub calc { | |
735 | \& for (;;) { | |
736 | \& my $result; | |
737 | \& # (... do some calculations and set $result ...) | |
738 | \& { | |
739 | \& lock($total); # block until we obtain the lock | |
740 | \& $total += $result; | |
741 | \& } # lock implicitly released at end of scope | |
742 | \& last if $result == 0; | |
743 | \& } | |
744 | \& } | |
745 | .Ve | |
746 | .PP | |
747 | .Vb 7 | |
748 | \& my $thr1 = threads->new(\e&calc); | |
749 | \& my $thr2 = threads->new(\e&calc); | |
750 | \& my $thr3 = threads->new(\e&calc); | |
751 | \& $thr1->join; | |
752 | \& $thr2->join; | |
753 | \& $thr3->join; | |
754 | \& print "total=$total\en"; | |
755 | .Ve | |
756 | .PP | |
757 | \&\fIlock()\fR blocks the thread until the variable being locked is | |
758 | available. When \fIlock()\fR returns, your thread can be sure that no other | |
759 | thread can lock that variable until the outermost block containing the | |
760 | lock exits. | |
761 | .PP | |
762 | It's important to note that locks don't prevent access to the variable | |
763 | in question, only lock attempts. This is in keeping with Perl's | |
764 | longstanding tradition of courteous programming, and the advisory file | |
765 | locking that \fIflock()\fR gives you. | |
766 | .PP | |
767 | You may lock arrays and hashes as well as scalars. Locking an array, | |
768 | though, will not block subsequent locks on array elements, just lock | |
769 | attempts on the array itself. | |
770 | .PP | |
771 | Locks are recursive, which means it's okay for a thread to | |
772 | lock a variable more than once. The lock will last until the outermost | |
773 | \&\fIlock()\fR on the variable goes out of scope. For example: | |
774 | .PP | |
775 | .Vb 2 | |
776 | \& my $x : shared; | |
777 | \& doit(); | |
778 | .Ve | |
779 | .PP | |
780 | .Vb 15 | |
781 | \& sub doit { | |
782 | \& { | |
783 | \& { | |
784 | \& lock($x); # wait for lock | |
785 | \& lock($x); # NOOP - we already have the lock | |
786 | \& { | |
787 | \& lock($x); # NOOP | |
788 | \& { | |
789 | \& lock($x); # NOOP | |
790 | \& lockit_some_more(); | |
791 | \& } | |
792 | \& } | |
793 | \& } # *** implicit unlock here *** | |
794 | \& } | |
795 | \& } | |
796 | .Ve | |
797 | .PP | |
798 | .Vb 3 | |
799 | \& sub lockit_some_more { | |
800 | \& lock($x); # NOOP | |
801 | \& } # nothing happens here | |
802 | .Ve | |
803 | .PP | |
804 | Note that there is no \fIunlock()\fR function \- the only way to unlock a | |
805 | variable is to allow it to go out of scope. | |
806 | .PP | |
807 | A lock can either be used to guard the data contained within the variable | |
808 | being locked, or it can be used to guard something else, like a section | |
809 | of code. In this latter case, the variable in question does not hold any | |
810 | useful data, and exists only for the purpose of being locked. In this | |
811 | respect, the variable behaves like the mutexes and basic semaphores of | |
812 | traditional thread libraries. | |
813 | .Sh "A Thread Pitfall: Deadlocks" | |
814 | .IX Subsection "A Thread Pitfall: Deadlocks" | |
815 | Locks are a handy tool to synchronize access to data, and using them | |
816 | properly is the key to safe shared data. Unfortunately, locks aren't | |
817 | without their dangers, especially when multiple locks are involved. | |
818 | Consider the following code: | |
819 | .PP | |
820 | .Vb 1 | |
821 | \& use threads; | |
822 | .Ve | |
823 | .PP | |
824 | .Vb 14 | |
825 | \& my $a : shared = 4; | |
826 | \& my $b : shared = "foo"; | |
827 | \& my $thr1 = threads->new(sub { | |
828 | \& lock($a); | |
829 | \& threads->yield; | |
830 | \& sleep 20; | |
831 | \& lock($b); | |
832 | \& }); | |
833 | \& my $thr2 = threads->new(sub { | |
834 | \& lock($b); | |
835 | \& threads->yield; | |
836 | \& sleep 20; | |
837 | \& lock($a); | |
838 | \& }); | |
839 | .Ve | |
840 | .PP | |
841 | This program will probably hang until you kill it. The only way it | |
842 | won't hang is if one of the two threads acquires both locks | |
843 | first. A guaranteed-to-hang version is more complicated, but the | |
844 | principle is the same. | |
845 | .PP | |
846 | The first thread will grab a lock on \f(CW$a\fR, then, after a pause during which | |
847 | the second thread has probably had time to do some work, try to grab a | |
848 | lock on \f(CW$b\fR. Meanwhile, the second thread grabs a lock on \f(CW$b\fR, then later | |
849 | tries to grab a lock on \f(CW$a\fR. The second lock attempt for both threads will | |
850 | block, each waiting for the other to release its lock. | |
851 | .PP | |
852 | This condition is called a deadlock, and it occurs whenever two or | |
853 | more threads are trying to get locks on resources that the others | |
854 | own. Each thread will block, waiting for the other to release a lock | |
855 | on a resource. That never happens, though, since the thread with the | |
856 | resource is itself waiting for a lock to be released. | |
857 | .PP | |
858 | There are a number of ways to handle this sort of problem. The best | |
859 | way is to always have all threads acquire locks in the exact same | |
860 | order. If, for example, you lock variables \f(CW$a\fR, \f(CW$b\fR, and \f(CW$c\fR, always lock | |
861 | \&\f(CW$a\fR before \f(CW$b\fR, and \f(CW$b\fR before \f(CW$c\fR. It's also best to hold on to locks for | |
862 | as short a period of time to minimize the risks of deadlock. | |
863 | .PP | |
864 | The other synchronization primitives described below can suffer from | |
865 | similar problems. | |
866 | .Sh "Queues: Passing Data Around" | |
867 | .IX Subsection "Queues: Passing Data Around" | |
868 | A queue is a special thread-safe object that lets you put data in one | |
869 | end and take it out the other without having to worry about | |
870 | synchronization issues. They're pretty straightforward, and look like | |
871 | this: | |
872 | .PP | |
873 | .Vb 2 | |
874 | \& use threads; | |
875 | \& use Thread::Queue; | |
876 | .Ve | |
877 | .PP | |
878 | .Vb 6 | |
879 | \& my $DataQueue = Thread::Queue->new; | |
880 | \& $thr = threads->new(sub { | |
881 | \& while ($DataElement = $DataQueue->dequeue) { | |
882 | \& print "Popped $DataElement off the queue\en"; | |
883 | \& } | |
884 | \& }); | |
885 | .Ve | |
886 | .PP | |
887 | .Vb 6 | |
888 | \& $DataQueue->enqueue(12); | |
889 | \& $DataQueue->enqueue("A", "B", "C"); | |
890 | \& $DataQueue->enqueue(\e$thr); | |
891 | \& sleep 10; | |
892 | \& $DataQueue->enqueue(undef); | |
893 | \& $thr->join; | |
894 | .Ve | |
895 | .PP | |
896 | You create the queue with \f(CW\*(C`new Thread::Queue\*(C'\fR. Then you can | |
897 | add lists of scalars onto the end with \fIenqueue()\fR, and pop scalars off | |
898 | the front of it with \fIdequeue()\fR. A queue has no fixed size, and can grow | |
899 | as needed to hold everything pushed on to it. | |
900 | .PP | |
901 | If a queue is empty, \fIdequeue()\fR blocks until another thread enqueues | |
902 | something. This makes queues ideal for event loops and other | |
903 | communications between threads. | |
904 | .Sh "Semaphores: Synchronizing Data Access" | |
905 | .IX Subsection "Semaphores: Synchronizing Data Access" | |
906 | Semaphores are a kind of generic locking mechanism. In their most basic | |
907 | form, they behave very much like lockable scalars, except that thay | |
908 | can't hold data, and that they must be explicitly unlocked. In their | |
909 | advanced form, they act like a kind of counter, and can allow multiple | |
910 | threads to have the 'lock' at any one time. | |
911 | .Sh "Basic semaphores" | |
912 | .IX Subsection "Basic semaphores" | |
913 | Semaphores have two methods, \fIdown()\fR and \fIup()\fR: \fIdown()\fR decrements the resource | |
914 | count, while up increments it. Calls to \fIdown()\fR will block if the | |
915 | semaphore's current count would decrement below zero. This program | |
916 | gives a quick demonstration: | |
917 | .PP | |
918 | .Vb 2 | |
919 | \& use threads qw(yield); | |
920 | \& use Thread::Semaphore; | |
921 | .Ve | |
922 | .PP | |
923 | .Vb 2 | |
924 | \& my $semaphore = new Thread::Semaphore; | |
925 | \& my $GlobalVariable : shared = 0; | |
926 | .Ve | |
927 | .PP | |
928 | .Vb 3 | |
929 | \& $thr1 = new threads \e&sample_sub, 1; | |
930 | \& $thr2 = new threads \e&sample_sub, 2; | |
931 | \& $thr3 = new threads \e&sample_sub, 3; | |
932 | .Ve | |
933 | .PP | |
934 | .Vb 16 | |
935 | \& sub sample_sub { | |
936 | \& my $SubNumber = shift @_; | |
937 | \& my $TryCount = 10; | |
938 | \& my $LocalCopy; | |
939 | \& sleep 1; | |
940 | \& while ($TryCount--) { | |
941 | \& $semaphore->down; | |
942 | \& $LocalCopy = $GlobalVariable; | |
943 | \& print "$TryCount tries left for sub $SubNumber (\e$GlobalVariable is $GlobalVariable)\en"; | |
944 | \& yield; | |
945 | \& sleep 2; | |
946 | \& $LocalCopy++; | |
947 | \& $GlobalVariable = $LocalCopy; | |
948 | \& $semaphore->up; | |
949 | \& } | |
950 | \& } | |
951 | .Ve | |
952 | .PP | |
953 | .Vb 3 | |
954 | \& $thr1->join; | |
955 | \& $thr2->join; | |
956 | \& $thr3->join; | |
957 | .Ve | |
958 | .PP | |
959 | The three invocations of the subroutine all operate in sync. The | |
960 | semaphore, though, makes sure that only one thread is accessing the | |
961 | global variable at once. | |
962 | .Sh "Advanced Semaphores" | |
963 | .IX Subsection "Advanced Semaphores" | |
964 | By default, semaphores behave like locks, letting only one thread | |
965 | \&\fIdown()\fR them at a time. However, there are other uses for semaphores. | |
966 | .PP | |
967 | Each semaphore has a counter attached to it. By default, semaphores are | |
968 | created with the counter set to one, \fIdown()\fR decrements the counter by | |
969 | one, and \fIup()\fR increments by one. However, we can override any or all | |
970 | of these defaults simply by passing in different values: | |
971 | .PP | |
972 | .Vb 4 | |
973 | \& use threads; | |
974 | \& use Thread::Semaphore; | |
975 | \& my $semaphore = Thread::Semaphore->new(5); | |
976 | \& # Creates a semaphore with the counter set to five | |
977 | .Ve | |
978 | .PP | |
979 | .Vb 2 | |
980 | \& $thr1 = threads->new(\e&sub1); | |
981 | \& $thr2 = threads->new(\e&sub1); | |
982 | .Ve | |
983 | .PP | |
984 | .Vb 5 | |
985 | \& sub sub1 { | |
986 | \& $semaphore->down(5); # Decrements the counter by five | |
987 | \& # Do stuff here | |
988 | \& $semaphore->up(5); # Increment the counter by five | |
989 | \& } | |
990 | .Ve | |
991 | .PP | |
992 | .Vb 2 | |
993 | \& $thr1->detach; | |
994 | \& $thr2->detach; | |
995 | .Ve | |
996 | .PP | |
997 | If \fIdown()\fR attempts to decrement the counter below zero, it blocks until | |
998 | the counter is large enough. Note that while a semaphore can be created | |
999 | with a starting count of zero, any \fIup()\fR or \fIdown()\fR always changes the | |
1000 | counter by at least one, and so \f(CW$semaphore\fR\->\fIdown\fR\|(0) is the same as | |
1001 | \&\f(CW$semaphore\fR\->\fIdown\fR\|(1). | |
1002 | .PP | |
1003 | The question, of course, is why would you do something like this? Why | |
1004 | create a semaphore with a starting count that's not one, or why | |
1005 | decrement/increment it by more than one? The answer is resource | |
1006 | availability. Many resources that you want to manage access for can be | |
1007 | safely used by more than one thread at once. | |
1008 | .PP | |
1009 | For example, let's take a \s-1GUI\s0 driven program. It has a semaphore that | |
1010 | it uses to synchronize access to the display, so only one thread is | |
1011 | ever drawing at once. Handy, but of course you don't want any thread | |
1012 | to start drawing until things are properly set up. In this case, you | |
1013 | can create a semaphore with a counter set to zero, and up it when | |
1014 | things are ready for drawing. | |
1015 | .PP | |
1016 | Semaphores with counters greater than one are also useful for | |
1017 | establishing quotas. Say, for example, that you have a number of | |
1018 | threads that can do I/O at once. You don't want all the threads | |
1019 | reading or writing at once though, since that can potentially swamp | |
1020 | your I/O channels, or deplete your process' quota of filehandles. You | |
1021 | can use a semaphore initialized to the number of concurrent I/O | |
1022 | requests (or open files) that you want at any one time, and have your | |
1023 | threads quietly block and unblock themselves. | |
1024 | .PP | |
1025 | Larger increments or decrements are handy in those cases where a | |
1026 | thread needs to check out or return a number of resources at once. | |
1027 | .Sh "\fIcond_wait()\fP and \fIcond_signal()\fP" | |
1028 | .IX Subsection "cond_wait() and cond_signal()" | |
1029 | These two functions can be used in conjunction with locks to notify | |
1030 | co-operating threads that a resource has become available. They are | |
1031 | very similar in use to the functions found in \f(CW\*(C`pthreads\*(C'\fR. However | |
1032 | for most purposes, queues are simpler to use and more intuitive. See | |
1033 | threads::shared for more details. | |
1034 | .SH "General Thread Utility Routines" | |
1035 | .IX Header "General Thread Utility Routines" | |
1036 | We've covered the workhorse parts of Perl's threading package, and | |
1037 | with these tools you should be well on your way to writing threaded | |
1038 | code and packages. There are a few useful little pieces that didn't | |
1039 | really fit in anyplace else. | |
1040 | .Sh "What Thread Am I In?" | |
1041 | .IX Subsection "What Thread Am I In?" | |
1042 | The \f(CW\*(C`threads\->self\*(C'\fR class method provides your program with a way to | |
1043 | get an object representing the thread it's currently in. You can use this | |
1044 | object in the same way as the ones returned from thread creation. | |
1045 | .Sh "Thread IDs" | |
1046 | .IX Subsection "Thread IDs" | |
1047 | \&\fItid()\fR is a thread object method that returns the thread \s-1ID\s0 of the | |
1048 | thread the object represents. Thread IDs are integers, with the main | |
1049 | thread in a program being 0. Currently Perl assigns a unique tid to | |
1050 | every thread ever created in your program, assigning the first thread | |
1051 | to be created a tid of 1, and increasing the tid by 1 for each new | |
1052 | thread that's created. | |
1053 | .Sh "Are These Threads The Same?" | |
1054 | .IX Subsection "Are These Threads The Same?" | |
1055 | The \fIequal()\fR method takes two thread objects and returns true | |
1056 | if the objects represent the same thread, and false if they don't. | |
1057 | .PP | |
1058 | Thread objects also have an overloaded == comparison so that you can do | |
1059 | comparison on them as you would with normal objects. | |
1060 | .Sh "What Threads Are Running?" | |
1061 | .IX Subsection "What Threads Are Running?" | |
1062 | \&\f(CW\*(C`threads\->list\*(C'\fR returns a list of thread objects, one for each thread | |
1063 | that's currently running and not detached. Handy for a number of things, | |
1064 | including cleaning up at the end of your program: | |
1065 | .PP | |
1066 | .Vb 7 | |
1067 | \& # Loop through all the threads | |
1068 | \& foreach $thr (threads->list) { | |
1069 | \& # Don't join the main thread or ourselves | |
1070 | \& if ($thr->tid && !threads::equal($thr, threads->self)) { | |
1071 | \& $thr->join; | |
1072 | \& } | |
1073 | \& } | |
1074 | .Ve | |
1075 | .PP | |
1076 | If some threads have not finished running when the main Perl thread | |
1077 | ends, Perl will warn you about it and die, since it is impossible for Perl | |
1078 | to clean up itself while other threads are running | |
1079 | .SH "A Complete Example" | |
1080 | .IX Header "A Complete Example" | |
1081 | Confused yet? It's time for an example program to show some of the | |
1082 | things we've covered. This program finds prime numbers using threads. | |
1083 | .PP | |
1084 | .Vb 34 | |
1085 | \& 1 #!/usr/bin/perl -w | |
1086 | \& 2 # prime-pthread, courtesy of Tom Christiansen | |
1087 | \& 3 | |
1088 | \& 4 use strict; | |
1089 | \& 5 | |
1090 | \& 6 use threads; | |
1091 | \& 7 use Thread::Queue; | |
1092 | \& 8 | |
1093 | \& 9 my $stream = new Thread::Queue; | |
1094 | \& 10 my $kid = new threads(\e&check_num, $stream, 2); | |
1095 | \& 11 | |
1096 | \& 12 for my $i ( 3 .. 1000 ) { | |
1097 | \& 13 $stream->enqueue($i); | |
1098 | \& 14 } | |
1099 | \& 15 | |
1100 | \& 16 $stream->enqueue(undef); | |
1101 | \& 17 $kid->join; | |
1102 | \& 18 | |
1103 | \& 19 sub check_num { | |
1104 | \& 20 my ($upstream, $cur_prime) = @_; | |
1105 | \& 21 my $kid; | |
1106 | \& 22 my $downstream = new Thread::Queue; | |
1107 | \& 23 while (my $num = $upstream->dequeue) { | |
1108 | \& 24 next unless $num % $cur_prime; | |
1109 | \& 25 if ($kid) { | |
1110 | \& 26 $downstream->enqueue($num); | |
1111 | \& 27 } else { | |
1112 | \& 28 print "Found prime $num\en"; | |
1113 | \& 29 $kid = new threads(\e&check_num, $downstream, $num); | |
1114 | \& 30 } | |
1115 | \& 31 } | |
1116 | \& 32 $downstream->enqueue(undef) if $kid; | |
1117 | \& 33 $kid->join if $kid; | |
1118 | \& 34 } | |
1119 | .Ve | |
1120 | .PP | |
1121 | This program uses the pipeline model to generate prime numbers. Each | |
1122 | thread in the pipeline has an input queue that feeds numbers to be | |
1123 | checked, a prime number that it's responsible for, and an output queue | |
1124 | into which it funnels numbers that have failed the check. If the thread | |
1125 | has a number that's failed its check and there's no child thread, then | |
1126 | the thread must have found a new prime number. In that case, a new | |
1127 | child thread is created for that prime and stuck on the end of the | |
1128 | pipeline. | |
1129 | .PP | |
1130 | This probably sounds a bit more confusing than it really is, so let's | |
1131 | go through this program piece by piece and see what it does. (For | |
1132 | those of you who might be trying to remember exactly what a prime | |
1133 | number is, it's a number that's only evenly divisible by itself and 1) | |
1134 | .PP | |
1135 | The bulk of the work is done by the \fIcheck_num()\fR subroutine, which | |
1136 | takes a reference to its input queue and a prime number that it's | |
1137 | responsible for. After pulling in the input queue and the prime that | |
1138 | the subroutine's checking (line 20), we create a new queue (line 22) | |
1139 | and reserve a scalar for the thread that we're likely to create later | |
1140 | (line 21). | |
1141 | .PP | |
1142 | The while loop from lines 23 to line 31 grabs a scalar off the input | |
1143 | queue and checks against the prime this thread is responsible | |
1144 | for. Line 24 checks to see if there's a remainder when we modulo the | |
1145 | number to be checked against our prime. If there is one, the number | |
1146 | must not be evenly divisible by our prime, so we need to either pass | |
1147 | it on to the next thread if we've created one (line 26) or create a | |
1148 | new thread if we haven't. | |
1149 | .PP | |
1150 | The new thread creation is line 29. We pass on to it a reference to | |
1151 | the queue we've created, and the prime number we've found. | |
1152 | .PP | |
1153 | Finally, once the loop terminates (because we got a 0 or undef in the | |
1154 | queue, which serves as a note to die), we pass on the notice to our | |
1155 | child and wait for it to exit if we've created a child (lines 32 and | |
1156 | 37). | |
1157 | .PP | |
1158 | Meanwhile, back in the main thread, we create a queue (line 9) and the | |
1159 | initial child thread (line 10), and pre-seed it with the first prime: | |
1160 | 2. Then we queue all the numbers from 3 to 1000 for checking (lines | |
1161 | 12\-14), then queue a die notice (line 16) and wait for the first child | |
1162 | thread to terminate (line 17). Because a child won't die until its | |
1163 | child has died, we know that we're done once we return from the join. | |
1164 | .PP | |
1165 | That's how it works. It's pretty simple; as with many Perl programs, | |
1166 | the explanation is much longer than the program. | |
1167 | .SH "Performance considerations" | |
1168 | .IX Header "Performance considerations" | |
1169 | The main thing to bear in mind when comparing ithreads to other threading | |
1170 | models is the fact that for each new thread created, a complete copy of | |
1171 | all the variables and data of the parent thread has to be taken. Thus | |
1172 | thread creation can be quite expensive, both in terms of memory usage and | |
1173 | time spent in creation. The ideal way to reduce these costs is to have a | |
1174 | relatively short number of long-lived threads, all created fairly early | |
1175 | on \- before the base thread has accumulated too much data. Of course, this | |
1176 | may not always be possible, so compromises have to be made. However, after | |
1177 | a thread has been created, its performance and extra memory usage should | |
1178 | be little different than ordinary code. | |
1179 | .PP | |
1180 | Also note that under the current implementation, shared variables | |
1181 | use a little more memory and are a little slower than ordinary variables. | |
1182 | .SH "Process-scope Changes" | |
1183 | .IX Header "Process-scope Changes" | |
1184 | Note that while threads themselves are separate execution threads and | |
1185 | Perl data is thread-private unless explicitly shared, the threads can | |
1186 | affect process-scope state, affecting all the threads. | |
1187 | .PP | |
1188 | The most common example of this is changing the current working | |
1189 | directory using \fIchdir()\fR. One thread calls \fIchdir()\fR, and the working | |
1190 | directory of all the threads changes. | |
1191 | .PP | |
1192 | Even more drastic example of a process-scope change is \fIchroot()\fR: | |
1193 | the root directory of all the threads changes, and no thread can | |
1194 | undo it (as opposed to \fIchdir()\fR). | |
1195 | .PP | |
1196 | Further examples of process-scope changes include \fIumask()\fR and | |
1197 | changing uids/gids. | |
1198 | .PP | |
1199 | Thinking of mixing \fIfork()\fR and threads? Please lie down and wait | |
1200 | until the feeling passes\*(-- but in case you really want to know, | |
1201 | the semantics is that \fIfork()\fR duplicates all the threads. | |
1202 | (In \s-1UNIX\s0, at least, other platforms will do something different.) | |
1203 | .PP | |
1204 | Similarly, mixing signals and threads should not be attempted. | |
1205 | Implementations are platform\-dependent, and even the \s-1POSIX\s0 | |
1206 | semantics may not be what you expect (and Perl doesn't even | |
1207 | give you the full \s-1POSIX\s0 \s-1API\s0). | |
1208 | .SH "Thread-Safety of System Libraries" | |
1209 | .IX Header "Thread-Safety of System Libraries" | |
1210 | Whether various library calls are thread-safe is outside the control | |
1211 | of Perl. Calls often suffering from not being thread-safe include: | |
1212 | \&\fIlocaltime()\fR, \fIgmtime()\fR, get{gr,host,net,proto,serv,pw}*(), \fIreaddir()\fR, | |
1213 | \&\fIrand()\fR, and \fIsrand()\fR \*(-- in general, calls that depend on some global | |
1214 | external state. | |
1215 | .PP | |
1216 | If the system Perl is compiled in has thread-safe variants of such | |
1217 | calls, they will be used. Beyond that, Perl is at the mercy of | |
1218 | the thread-safety or \-unsafety of the calls. Please consult your | |
1219 | C library call documentation. | |
1220 | .PP | |
1221 | In some platforms the thread-safe interfaces may fail if the result | |
1222 | buffer is too small (for example \fIgetgrent()\fR may return quite large | |
1223 | group member lists). Perl will retry growing the result buffer | |
1224 | a few times, but only up to 64k (for safety reasons). | |
1225 | .SH "Conclusion" | |
1226 | .IX Header "Conclusion" | |
1227 | A complete thread tutorial could fill a book (and has, many times), | |
1228 | but with what we've covered in this introduction, you should be well | |
1229 | on your way to becoming a threaded Perl expert. | |
1230 | .SH "Bibliography" | |
1231 | .IX Header "Bibliography" | |
1232 |