百囚徒问题（100 prisoners problem）

该问题由丹麦计算机科学家Peter Bro Miltersen于2003年首次提出。

问题描述

在监狱中有100名囚犯，被编号为1-100号。典狱长决定给囚犯们一次特赦的机会，条件是通过一项挑战。在一个房间中放着一个有100个抽屉的橱柜，里面随机放着与囚犯编号对应的1-100的号码牌。挑战开始后，每个囚犯依次进入该房间，打开不超过半数的抽屉，并从中找到与自己对应的号码则为成功，每名囚犯出去时该橱柜恢复原样。从第一名囚犯进入直至最后一名囚犯出来期间不允许有任何交流，任何一名囚犯挑战失败都会导致所有囚犯死亡，只有全部成功才能够特赦该100名囚犯。如果囚犯们都随机打开50个抽屉，他们的生存几率微乎其微。所以囚犯们需要找到一个最佳策略，来提高生存率。

最佳策略

每个囚犯首先打开与自己号码对应的抽屉；
如果该抽屉里的号码牌是此囚犯的号码，则该囚犯挑战成功；
否则该抽屉中存放的是另一个囚犯对应的号码牌，接着用该号码牌对应的抽屉；
每名囚犯重复2和3的步骤，直到找到自己的号码牌或者打开了50个抽屉为止。

举例

使用8个囚犯和抽屉来进行演示，每个囚犯最多可以打开4个抽屉。典狱长将抽屉中的号码牌如下摆放。

抽屉号码	1	2	3	4	5	6	7	8
号码牌	7	4	6	8	1	3	5	2

囚犯的行为如下：

囚犯1首先打开抽屉1并找到数字7。然后他打开抽屉7并找到数字5。然后打开抽屉5并在其中成功找到自己的数字。
囚犯2依次打开抽屉2、4和8，并在最后一个抽屉中找到了自己的数字2。
囚犯3打开抽屉3和6，在其中找到自己的号码牌。
囚犯4打开抽屉4、8和2，在2号抽屉中找到自己的编号。请注意，这与囚犯2遇到的周期相同，但他不知道。
5至8号囚犯也将以相似的方式找到自己的号码。

这种情况下，囚犯们都能够找到自己的数字，但并非所有情况都如此幸运。例如，将抽屉5和8的号码牌互换，将导致1号囚犯找不到自己的号码牌。

代码实现

代码实现主要比较以下两种情况下的生存概率：

用数千个实例模拟囚犯随即打开抽屉
用数千个实例模拟囚犯使用最佳策略打开抽屉

C

#include<stdbool.h>
#include<stdlib.h>
#include<stdio.h>
#include<time.h>
 
#define LIBERTY false
#define DEATH true
 
typedef struct{ 
   
	int id;
	int cardNum;
	bool hasBeenOpened;
}drawer;
 
typedef struct{ 
   
	int id;
	bool foundCard;
}prisoner;
 
drawer *drawerSet;
prisoner *prisonerGang;
 
void initialize(int prisoners){ 
   
	int i,j,card;
	bool unique;
 
	drawerSet = (drawer*)malloc(prisoners * sizeof(drawer));
	prisonerGang = (prisoner*)malloc(prisoners * sizeof(prisoner));
 
	for(i=0;i<prisoners;i++){ 
   
		prisonerGang[i] = (prisoner){ 
   .id = i+1, .foundCard = false};
 
		card = rand()%prisoners + 1;
 
		if(i==0)
			drawerSet[i] = (drawer){ 
   .id = i+1, .cardNum = card, .hasBeenOpened = false};
		else{ 
   
			unique = false;
			while(unique==false){ 
   
				for(j=0;j<i;j++){ 
   
					if(drawerSet[j].cardNum == card){ 
   
						card = rand()%prisoners + 1;
						break;
					}
				}
				if(j==i){ 
   
					unique = true;
				}
			}
			drawerSet[i] = (drawer){ 
   .id = i+1, .cardNum = card, .hasBeenOpened = false};
		}
	}
}
 
void closeAllDrawers(int prisoners){ 
   
	int i;
	for(i=0;i<prisoners;i++)
		drawerSet[i].hasBeenOpened = false;
}
 
bool libertyOrDeathAtRandom(int prisoners,int chances){ 
   
	int i,j,chosenDrawer;
 
	for(i=0;i<prisoners;i++){ 
   
		for(j=0;j<chances;j++){ 
   
			do{ 
   
				chosenDrawer = rand()%prisoners;
			}while(drawerSet[chosenDrawer].hasBeenOpened==true);
			if(drawerSet[chosenDrawer].cardNum == prisonerGang[i].id){ 
   
				prisonerGang[i].foundCard = true;
				break;
			}
			drawerSet[chosenDrawer].hasBeenOpened = true;
		}
		closeAllDrawers(prisoners);
		if(prisonerGang[i].foundCard == false)
			return DEATH;
	}
 
	return LIBERTY;
}
 
bool libertyOrDeathPlanned(int prisoners,int chances){ 
   
	int i,j,chosenDrawer;
	for(i=0;i<prisoners;i++){ 
   
		chosenDrawer = rand()%prisoners;
		for(j=1;j<chances;j++){ 
   
			if(drawerSet[chosenDrawer].cardNum == prisonerGang[i].id){ 
   
				prisonerGang[i].foundCard = true;
				break;
			}
			if(chosenDrawer+1 == drawerSet[chosenDrawer].id){ 
   
				do{ 
   
                    chosenDrawer = rand()%prisoners;
				}while(drawerSet[chosenDrawer].hasBeenOpened==true);
			}
			else{ 
   
				chosenDrawer = drawerSet[chosenDrawer].cardNum - 1;
			}
			drawerSet[chosenDrawer].hasBeenOpened = true;
		}
		closeAllDrawers(prisoners);
		if(prisonerGang[i].foundCard == false)
			return DEATH;
	}
 
	return LIBERTY;
}
 
int main(int argc,char** argv)
{ 
   
	int prisoners, chances;
	unsigned long long int trials,i,count = 0;
        char* end;
 
	if(argc!=4)
		return printf("Usage : %s <Number of prisoners> <Number of chances> <Number of trials>",argv[0]);
 
	prisoners = atoi(argv[1]);
	chances = atoi(argv[2]);
	trials = strtoull(argv[3],&end,10);
 
	srand(time(NULL));
 
	printf("Running random trials...");
	for(i=0;i<trials;i+=1L){ 
   
		initialize(prisoners);
 
		count += libertyOrDeathAtRandom(prisoners,chances)==DEATH?0:1;
	}
 
	printf("\n\nGames Played : %llu\nGames Won : %llu\nChances : %lf % \n\n",trials,count,(100.0*count)/trials);
 
        count = 0;
 
	printf("Running strategic trials...");
	for(i=0;i<trials;i+=1L){ 
   
		initialize(prisoners);
 
		count += libertyOrDeathPlanned(prisoners,chances)==DEATH?0:1;
	}
 
	printf("\n\nGames Played : %llu\nGames Won : %llu\nChances : %lf % \n\n",trials,count,(100.0*count)/trials);
	return 0;
}

测试：

C:\My Projects\networks>a 100 50 100000
Running random trials...

Games Played : 100000
Games Won : 0
Chances : 0.000000%

Running strategic trials...

Games Played : 100000
Games Won : 0
Chances : 0.000000


C:\My Projects\networks>a 100 50 1000000
Running random trials...

Games Played : 1000000
Games Won : 0
Chances : 0.000000

Running strategic trials...

Games Played : 1000000
Games Won : 0
Chances : 0.000000

C#

using System;
using System.Linq;
 
namespace Prisoners { 
   
    class Program { 
   
        static bool PlayOptimal() { 
   
            var secrets = Enumerable.Range(0, 100).OrderBy(a => Guid.NewGuid()).ToList();
 
            for (int p = 0; p < 100; p++) { 
   
                bool success = false;
 
                var choice = p;
                for (int i = 0; i < 50; i++) { 
   
                    if (secrets[choice] == p) { 
   
                        success = true;
                        break;
                    }
                    choice = secrets[choice];
                }
 
                if (!success) { 
   
                    return false;
                }
            }
 
            return true;
        }
 
        static bool PlayRandom() { 
   
            var secrets = Enumerable.Range(0, 100).OrderBy(a => Guid.NewGuid()).ToList();
 
            for (int p = 0; p < 100; p++) { 
   
                var choices = Enumerable.Range(0, 100).OrderBy(a => Guid.NewGuid()).ToList();
 
                bool success = false;
                for (int i = 0; i < 50; i++) { 
   
                    if (choices[i] == p) { 
   
                        success = true;
                        break;
                    }
                }
 
                if (!success) { 
   
                    return false;
                }
            }
 
            return true;
        }
 
        static double Exec(uint n, Func<bool> play) { 
   
            uint success = 0;
            for (uint i = 0; i < n; i++) { 
   
                if (play()) { 
   
                    success++;
                }
            }
            return 100.0 * success / n;
        }
 
        static void Main() { 
   
            const uint N = 1_000_000;
            Console.WriteLine("# of executions: {0}", N);
            Console.WriteLine("Optimal play success rate: {0:0.00000000000}%", Exec(N, PlayOptimal));
            Console.WriteLine(" Random play success rate: {0:0.00000000000}%", Exec(N, PlayRandom));
        }
    }
}

输出：

# of executions: 1000000
Optimal play success rate: 31.21310000000%
 Random play success rate: 0.00000000000%

C++

#include <iostream> //for output
#include <algorithm> //for shuffle
#include <stdlib.h> //for rand()
 
using namespace std;
 
int* setDrawers() { 
   
	int drawers[100];
	for (int i = 0; i < 100; i++) { 
   
		drawers[i] = i;
	}
	random_shuffle(&drawers[0], &drawers[99]);
	return drawers;
}
 
bool playRandom()
{ 
   
	int* drawers = setDrawers();
	bool openedDrawers[100] = { 
    0 };
	for (int prisonerNum = 0; prisonerNum < 100; prisonerNum++) { 
   	//loops through prisoners numbered 0 through 99
		bool prisonerSuccess = false;
		for (int i = 0; i < 50; i++) { 
   	//loops through 50 draws for each prisoner 
			int drawerNum;
			while (true) { 
   
				drawerNum = rand() % 100;
				if (!openedDrawers[drawerNum]) { 
   
					openedDrawers[drawerNum] = true;
					cout << endl;
					break;
				}
			}
			if (*(drawers + drawerNum) == prisonerNum) { 
   
				prisonerSuccess = true;
				break;
			}
		}
		if (!prisonerSuccess)
			return false;
	}
	return true;
}
 
bool playOptimal()
{ 
   
	int* drawers = setDrawers();
	for (int prisonerNum = 0; prisonerNum < 100; prisonerNum++) { 
   
		bool prisonerSuccess = false;
		int checkDrawerNum = prisonerNum;
		for (int i = 0; i < 50; i++) { 
   
			if (*(drawers + checkDrawerNum) == prisonerNum) { 
   
				prisonerSuccess = true;
				break;
			}
			else
				checkDrawerNum = *(drawers + checkDrawerNum);
		}
		if (!prisonerSuccess)
			return false;
	}
	return true;
}
 
double simulate(string strategy)
{ 
   
	int numberOfSuccesses = 0;
	for (int i = 0; i <= 10000; i++) { 
   
		if ((strategy == "random" && playRandom()) || (strategy == "optimal" && playOptimal())) //will run playRandom or playOptimal but not both becuase of short-circuit evaluation
			numberOfSuccesses++;
	}
	return numberOfSuccesses / 100.0;
}
 
int main()
{ 
   
	cout << "Random Strategy: " << simulate("random") << "%" << endl;
	cout << "Optimal Strategy: " << simulate("optimal") << "%" << endl;
	system("PAUSE");
	return 0;
}

输出：

Random Strategy: 0%
Optimal Strategy: 31.51%

Go

package main
 
import (
    "fmt"
    "math/rand"
    "time"
)
 
// Uses 0-based numbering rather than 1-based numbering throughout.
func doTrials(trials, np int, strategy string) { 
   
    pardoned := 0
trial:
    for t := 0; t < trials; t++ { 
   
        var drawers [100]int
        for i := 0; i < 100; i++ { 
   
            drawers[i] = i
        }
        rand.Shuffle(100, func(i, j int) { 
   
            drawers[i], drawers[j] = drawers[j], drawers[i]
        })
    prisoner:
        for p := 0; p < np; p++ { 
   
            if strategy == "optimal" { 
   
                prev := p
                for d := 0; d < 50; d++ { 
   
                    this := drawers[prev]
                    if this == p { 
   
                        continue prisoner
                    }
                    prev = this
                }
            } else { 
   
                // Assumes a prisoner remembers previous drawers (s)he opened
                // and chooses at random from the others.
                var opened [100]bool
                for d := 0; d < 50; d++ { 
   
                    var n int
                    for { 
   
                        n = rand.Intn(100)
                        if !opened[n] { 
   
                            opened[n] = true
                            break
                        }
                    }
                    if drawers[n] == p { 
   
                        continue prisoner
                    }
                }
            }
            continue trial
        }
        pardoned++
    }
    rf := float64(pardoned) / float64(trials) * 100
    fmt.Printf(" strategy = %-7s pardoned = %-6d relative frequency = %5.2f%%\n\n", strategy, pardoned, rf)
}
 
func main() { 
   
    rand.Seed(time.Now().UnixNano())
    const trials = 100_000
    for _, np := range []int{ 
   10, 100} { 
   
        fmt.Printf("Results from %d trials with %d prisoners:\n\n", trials, np)
        for _, strategy := range [2]string{ 
   "random", "optimal"} { 
   
            doTrials(trials, np, strategy)
        }
    }
}

输出：

Results from 100000 trials with 10 prisoners:

  strategy = random   pardoned = 99     relative frequency =  0.10%

  strategy = optimal  pardoned = 31205  relative frequency = 31.20%

Results from 100000 trials with 100 prisoners:

  strategy = random   pardoned = 0      relative frequency =  0.00%

  strategy = optimal  pardoned = 31154  relative frequency = 31.15%

Java

import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
 
public class Main { 
   
    private static boolean playOptimal(int n) { 
   
        List<Integer> secretList = IntStream.range(0, n).boxed().collect(Collectors.toList());
        Collections.shuffle(secretList);
 
        prisoner:
        for (int i = 0; i < secretList.size(); ++i) { 
   
            int prev = i;
            for (int j = 0; j < secretList.size() / 2; ++j) { 
   
                if (secretList.get(prev) == i) { 
   
                    continue prisoner;
                }
                prev = secretList.get(prev);
            }
            return false;
        }
        return true;
    }
 
    private static boolean playRandom(int n) { 
   
        List<Integer> secretList = IntStream.range(0, n).boxed().collect(Collectors.toList());
        Collections.shuffle(secretList);
 
        prisoner:
        for (Integer i : secretList) { 
   
            List<Integer> trialList = IntStream.range(0, n).boxed().collect(Collectors.toList());
            Collections.shuffle(trialList);
 
            for (int j = 0; j < trialList.size() / 2; ++j) { 
   
                if (Objects.equals(trialList.get(j), i)) { 
   
                    continue prisoner;
                }
            }
 
            return false;
        }
        return true;
    }
 
    private static double exec(int n, int p, Function<Integer, Boolean> play) { 
   
        int succ = 0;
        for (int i = 0; i < n; ++i) { 
   
            if (play.apply(p)) { 
   
                succ++;
            }
        }
        return (succ * 100.0) / n;
    }
 
    public static void main(String[] args) { 
   
        final int n = 100_000;
        final int p = 100;
        System.out.printf("# of executions: %d\n", n);
        System.out.printf("Optimal play success rate: %f%%\n", exec(n, p, Main::playOptimal));
        System.out.printf("Random play success rate: %f%%\n", exec(n, p, Main::playRandom));
    }
}

输出：

# of executions: 100000
Optimal play success rate: 31.343000%
Random play success rate: 0.000000%

JavaScript

const _ = require('lodash');
 
const numPlays = 100000;
 
const setupSecrets = () => { 
   
	// setup the drawers with random cards
	let secrets = [];
 
	for (let i = 0; i < 100; i++) { 
   
		secrets.push(i);
	}
 
	return _.shuffle(secrets);
}
 
const playOptimal = () => { 
   
 
	let secrets = setupSecrets();
 
 
	// Iterate once per prisoner
	loop1:
	for (let p = 0; p < 100; p++) { 
   
 
		// whether the prisoner succeedss
		let success = false;
 
		// the drawer number the prisoner chose
		let choice = p;
 
 
		// The prisoner can choose up to 50 cards
		loop2:
		for (let i = 0; i < 50; i++) { 
   
 
			// if the card in the drawer that the prisoner chose is his card
			if (secrets[choice] === p){ 
   
				success = true;
				break loop2;
			}
 
			// the next drawer the prisoner chooses will be the number of the card he has.
			choice = secrets[choice];
 
		}	// each prisoner gets 50 chances
 
 
		if (!success) return false;
 
	} // iterate for each prisoner 
 
	return true;
}
 
const playRandom = () => { 
   
 
	let secrets = setupSecrets();
 
	// iterate for each prisoner 
	for (let p = 0; p < 100; p++) { 
   
 
		let choices = setupSecrets();
 
		let success = false;
 
		for (let i = 0; i < 50; i++) { 
   
 
			if (choices[i] === p) { 
   
				success = true;
				break;
			}
		}
 
		if (!success) return false;
	}
 
	return true;
}
 
const execOptimal = () => { 
   
 
	let success = 0;
 
	for (let i = 0; i < numPlays; i++) { 
   
 
		if (playOptimal()) success++;
 
	}
 
	return 100.0 * success / 100000;
}
 
const execRandom = () => { 
   
 
	let success = 0;
 
	for (let i = 0; i < numPlays; i++) { 
   
 
		if (playRandom()) success++;
 
	}
 
	return 100.0 * success / 100000;
}
 
console.log("# of executions: " + numPlays);
console.log("Optimal Play Success Rate: " + execOptimal());
console.log("Random Play Success Rate: " + execRandom());

Python

import random
 
def play_random(n):
    # using 0-99 instead of ranges 1-100
    pardoned = 0
    in_drawer = list(range(100))
    sampler = list(range(100))
    for _round in range(n):
        random.shuffle(in_drawer)
        found = False
        for prisoner in range(100):
            found = False
            for reveal in random.sample(sampler, 50):
                card = in_drawer[reveal]
                if card == prisoner:
                    found = True
                    break
            if not found:
                break
        if found:
            pardoned += 1
    return pardoned / n * 100   # %
 
def play_optimal(n):
    # using 0-99 instead of ranges 1-100
    pardoned = 0
    in_drawer = list(range(100))
    for _round in range(n):
        random.shuffle(in_drawer)
        for prisoner in range(100):
            reveal = prisoner
            found = False
            for go in range(50):
                card = in_drawer[reveal]
                if card == prisoner:
                    found = True
                    break
                reveal = card
            if not found:
                break
        if found:
            pardoned += 1
    return pardoned / n * 100   # %
 
if __name__ == '__main__':
    n = 100_000
    print(" Simulation count:", n)
    print(f" Random play wins: {play_random(n):4.1f}% of simulations")
    print(f"Optimal play wins: {play_optimal(n):4.1f}% of simulations")

输出：

 Simulation count: 100000
 Random play wins:  0.0% of simulations
Optimal play wins: 31.1% of simulations

今天的文章百囚徒问题（100 prisoners problem）分享到此就结束了，感谢您的阅读。

版权声明：本文内容由互联网用户自发贡献，该文观点仅代表作者本人。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容，请发送邮件至举报，一经查实，本站将立刻删除。
如需转载请保留出处：https://bianchenghao.cn/63648.html