Введение:
Этот код является расширением оригинального кода Монте-Карло Эдвина, который улучшает эвристическую функцию, а также добавляет в код функцию грубой силы, чтобы повысить эффективность кодов по времени.
Это был проект, реализованный группой людей, которой были я и человек по имени Эдвин. В то время как я отвечал за код наивысшего балла, Эдвин отвечал за код Монте-Карло, и, поскольку они оба различны, я полагал, что они оба заслуживают отдельной статьи. У обоих есть свои преимущества и недостатки, и при применении реального ИИ метод Монте-Карло был бы единственным жизнеспособным вариантом, потому что наивысший результат имеет обманный характер, поскольку он может выбрать лучшую доску из вариантов многих досок. Тем не менее, по-прежнему очень приятно видеть, как код получает выигрышную доску (2048 плиток) менее чем за секунду, используя код.
Чтобы увидеть результаты кодов, я прилагаю график результатов кода, чтобы вы могли сами оценить его производительность. Обратите внимание, что наша система подсчета очков немного отличается от реальной системы подсчета очков 2048 года, где счет определяется суммой всех плиток на сетке.
После того, как Эдвин создал обновленный код, как вы можете видеть из выходных данных терминала, он смог стабильно достигать сеток 2048/4096.
import random import numpy as np import sys import time from itertools import product ROW_LENGTH = 4 STARTING_NUMBERS = 2 CHANCE_OF_TWO= 90 PLAYER_SCORE = 0 TOTAL_MOVES=0 NUMBER_OF_RUNS=1 POSSIBLE_MOVES=["up", "right", "down", "left"] SIMULATIONS=50 DEPTH = 2 POSSIBLE_ARRANGEMENTS = product(POSSIBLE_MOVES,repeat=DEPTH) def generateTile(): RANDOM_NUM = random.randint(1, 100) if RANDOM_NUM<CHANCE_OF_TWO: number = 2 else: number= 4 return number def createGrid(dimensions): grid = [[0 for i in range(dimensions)] for j in range(dimensions)] for i in range(STARTING_NUMBERS): number = generateTile() grid[random.randint(0,ROW_LENGTH-1)][random.randint(0,ROW_LENGTH-1)]= number displayGrid(grid) return grid def displayGrid(grid): print(np.array(grid)) def evalgrid(grid): value = np.sum(np.array(grid)) return value def evalgrid2(grid): value = np.sum(np.array(grid)*TEMPLATE) + np.amax(grid)**2 return value def move(direction, grid, score): if direction=="left" or direction=="right": for i in range(ROW_LENGTH): if direction=="right": grid[i]= grid[i][::-1] for j in range(grid[i].count(0)): grid[i].append(grid[i].pop(grid[i].index(0))) ##finds nearest 0 to the start of a row, pops it and then appends the popped value to the end aka it compresses the row for element in range(0,ROW_LENGTH-1): # adds the numbers if grid[i][element]==grid[i][element+1]: score+=grid[i][element]*2 grid[i][element]=grid[i][element]*2 grid[i].remove(grid[i][element+1]) grid[i].append(0) if direction == "right": grid[i] = grid[i][::-1] return grid, score else: collection = [grid[j][i] for i in range(0, ROW_LENGTH)for j in range(0, ROW_LENGTH)] vGrid= [collection[i*ROW_LENGTH:((i+1)*ROW_LENGTH)] for i in range(ROW_LENGTH)] for i in range(ROW_LENGTH): if direction == "down": vGrid[i] = vGrid[i][::-1] for j in range(vGrid[i].count(0)): vGrid[i].append(vGrid[i].pop(vGrid[i].index(0))) for element in range(0,ROW_LENGTH-1): # adds the numbers if vGrid[i][element]==vGrid[i][element+1]: score+=grid[i][element]*2 vGrid[i][element]=vGrid[i][element]*2 vGrid[i].remove(vGrid[i][element+1]) vGrid[i].append(0) if direction == "down": vGrid[i] = vGrid[i][::-1] for row in range(ROW_LENGTH): for column in range(ROW_LENGTH): grid[row][column]= vGrid[column][row] return grid,score def check(grid): gridArr = [] for direction in POSSIBLE_MOVES: temp = [x[:] for x in grid] gridArr.append(move(direction, temp, PLAYER_SCORE)) ##creates array of all possible moves for a single grid for potentialGrid in gridArr: if grid!=potentialGrid[0]: ## if any of the simulated arrays are different from the original one then the game is not over return False return True def updateGrid(grid, state): count = 0 for i in range(ROW_LENGTH): count+= grid[i].count(0) if not count: if check(grid): ##check(grid) is true when game is over return grid, True ##terminates game, see bestmove function if state==True: ##state is declared in bestmove() and the game loop. It just says whether a new tile needs to be added or not while True: row=random.randint(0,ROW_LENGTH-1) column=random.randint(0,ROW_LENGTH-1) if grid[row][column]==0: grid[row][column]=generateTile() return grid, False return grid, False def bestmove(grid, currentscore): finished = check(grid) best = ["up", 0] if finished: return "gameover" else: for direction in POSSIBLE_MOVES: evaluation = 0 for i in range(SIMULATIONS): tempgrid = [x[:] for x in grid] tempgrid = move(direction, tempgrid, currentscore)[0] ## stores all the possible grids after 1 move (doesn't consider random tile placed) if tempgrid != grid: tempgrid = updateGrid(tempgrid, True)[0] ##add a tile if the board has changed while True: ##run random moves until the simulated game terminates, evaluate the final grid and added it to the evaluation variable random_move = random.choice(POSSIBLE_MOVES) placeholder = [x[:] for x in tempgrid] tempgrid = move(random_move, tempgrid, 0)[0] if placeholder==tempgrid: tempgrid, ended = updateGrid(tempgrid, False) if ended: evaluation += evalgrid(tempgrid) ##evaluates how good the final grid is after it is completed using random moves break else: tempgrid, ended = updateGrid(tempgrid, True) ##ended isn't referenced because if the new grid is different then game isnt over if ended: evaluation += evalgrid(tempgrid) print("hi")##evaluates how good the final grid is after it is completed using random moves break if evaluation > best[1]: ##if evaluation for a direction is better than the current best move then the new direction becomes the best move best = [direction, evaluation] return best[0] c=0 def bruteforce(grid, depth, score, moves): global c if depth == 0: if c%1==0: pass #print(moves, c) #displayGrid(grid) #print("\n") else: best = ["up", 0] for direction in POSSIBLE_MOVES: tempgrid = [x[:] for x in grid] tempgrid = move(direction, tempgrid, score)[0] ##intentionally not adding piece yet if tempgrid != grid: evaluation = 0 for row in range(ROW_LENGTH): for col in range(ROW_LENGTH): if tempgrid[row][col] == 0: #pass tempgrid[row][col] = 2 c+=1 evaluation+=evalgrid2(tempgrid) bruteforce(tempgrid, depth-1, score, "{} -> {}".format(moves, direction)) tempgrid[row][col] = 0 if evaluation > best[1]: best = [direction, evaluation] return best[0] #grid = createGrid(ROW_LENGTH) #bruteforce(grid, DEPTH, PLAYER_SCORE, "Start") for i in range(NUMBER_OF_RUNS): start =time.time() board = createGrid(ROW_LENGTH) while board: #PLAYER_CURRENT_MOVE= input(f"Enter your move, your current score is {PLAYER_SCORE}") #PLAYER_CURRENT_MOVE=random.choice(POSSIBLE_MOVES) #print(board.count(0)) counter = 0 for i in board: counter += i.count(0) if counter <=2: PLAYER_CURRENT_MOVE = bestmove(board, PLAYER_SCORE) else: PLAYER_CURRENT_MOVE = bruteforce(board, DEPTH,PLAYER_SCORE,"START") placeholder = [x[:] for x in board] if PLAYER_CURRENT_MOVE[0] in ["r", "R"]: board, PLAYER_SCORE = move("right", board, PLAYER_SCORE) elif PLAYER_CURRENT_MOVE[0] in ["l","L","a"]: board, PLAYER_SCORE = move("left", board, PLAYER_SCORE) elif PLAYER_CURRENT_MOVE[0] in ["u","U","w"]: board, PLAYER_SCORE = move("up", board, PLAYER_SCORE) elif PLAYER_CURRENT_MOVE[0] in ["d","D","s"]: board, PLAYER_SCORE = move("down", board, PLAYER_SCORE) elif PLAYER_CURRENT_MOVE == "gameover": print(TOTAL_MOVES, PLAYER_CURRENT_MOVE, time.time()-start) displayGrid(board) print("\n") break else: print("Please enter a valid move: up, down, left or right") if placeholder==board: board, ended = updateGrid(board, False) ##if after a move the grid hasn't changed then a tile is not added if check(board): print(TOTAL_MOVES) displayGrid(board) #sys.exit() break else: board, ended = updateGrid(board, True) ##if the move has done something a tile is added if TOTAL_MOVES%100 == 0: ##shows grid progression after every n moves print(TOTAL_MOVES, PLAYER_CURRENT_MOVE) displayGrid(board) print("\n") TOTAL_MOVES+=1 sys.exit()
Улучшения:
def evalgrid(grid): value = np.sum(np.array(grid)) return value
Мы изменили эвристическую функцию, убрав шаблон, на который мы ее умножали, так как это никак не помогало коду. Таким образом, это означало, что функция подсчета очков просто суммировала все значения в сетке.
def bruteforce(grid, depth, score, moves): global c if depth == 0: if c%1==0: pass #print(moves, c) #displayGrid(grid) #print("\n") else: best = ["up", 0] for direction in POSSIBLE_MOVES: tempgrid = [x[:] for x in grid] tempgrid = move(direction, tempgrid, score)[0] ##intentionally not adding piece yet if tempgrid != grid: evaluation = 0 for row in range(ROW_LENGTH): for col in range(ROW_LENGTH): if tempgrid[row][col] == 0: #pass tempgrid[row][col] = 2 c+=1 evaluation+=evalgrid2(tempgrid) bruteforce(tempgrid, depth-1, score, "{} -> {}".format(moves, direction)) tempgrid[row][col] = 0 if evaluation > best[1]: best = [direction, evaluation] return best[0]
Brute force предлагает более быстрый способ получить готовую сетку и, следовательно, ускоряет код.