osm_map_processing/process_maps.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2016 Mike "KemoNine" Crosson
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import subprocess, sys, os, pprint, datetime, argparse, time

base_path = os.path.dirname(os.path.realpath(__file__))
env = os.environ.copy()

FNULL = open(os.devnull, 'w')

wget_cmd = 'wget'
bunzip2_cmd = 'bunzip2'

if __name__ == '__main__':
	current_timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M')

	parser = argparse.ArgumentParser()
	parser.add_argument('--map-list', action='append',
                        help='a text file with one map URL per line, can be specified more than once')
	parser.add_argument('--no-sleep', action='store_true',
                        help='don\'t sleep between downloads -- WARNING you can easily run into throttling on mirrors if you use this option')
	parser.add_argument('--use-ram', action='store_true',
                        help='use RAM for mapsforge processing -- WARNING mapsforge uses 10x the map size in RAM for processing (ie. 100Mb map = 1Gb RAM usage), you want a LOT of RAM for this option')
	parser.add_argument('--max-heap-space', action='store', default='4g',
                        help='set the max heap space for the JVM, use standard -Xmx values, default (4g) should be fine if not using --use-ram argument')
	parser.add_argument('--output-map-name', action='store', default='output',
                        help='set the output .map and .poi file names')
	parser.add_argument('--cached-maps-dir', action='store',
                        help='Use cached maps in the specified directory instead of downloads using map lists')
	#TODO: Add argument to pass path to osmosis
	#TODO: Add argument to pass path to output dir
	args = parser.parse_args()

	if args.map_list is None and args.cached_maps_dir is None:
		print('You MUST specify at least one map-list or cached-maps-dir')
		sys.exit(1)

	# Normalize map path directory based on CLI arg ahead of any path manipulations
	# DEFAULTS TO 'dl' (non-normalized) to retain original behavior of script
	cached_maps_dir = 'dl'
	if args.cached_maps_dir is not None:
		cached_maps_dir = os.path.abspath(args.cached_maps_dir)

	map_list = []

	if args.map_list is not None:
		for alist in args.map_list:
			with open(alist, 'r') as maps:
				for line in maps:
					map_list.append(line.strip())

	print('Creating working directories')
	if not os.path.exists('tmp'):
		os.makedirs('tmp')
	if not os.path.exists('out'):
		os.makedirs('out')
	os.chdir('out')
	output_dir = current_timestamp + '-' + args.output_map_name
	if not os.path.exists(output_dir):
		os.makedirs(output_dir)
	os.chdir(output_dir)

	if args.map_list is not None:
		if not os.path.exists('dl'):
			os.makedirs('dl')

		print('Downloading maps')
		for line in map_list:
			print('    ', end='')
			print(line)
			subprocess.run([wget_cmd, '-P', 'dl', line.strip()], stdout=FNULL, stderr=subprocess.STDOUT)
			if not args.no_sleep:
				print('    Sleeping to prevent throttle/blocking')
				time.sleep(300) # Seconds

		print('Decompressing maps (if necessary)')
		for dirpath, dirnames, filenames in os.walk('dl'):
			for file in filenames:
				if file.endswith('bz2'):
					print('    ', end='')
					print(file)
					subprocess.run([bunzip2_cmd, os.path.join(dirpath, file)])

	# Setup various runtime aspects (going to do multiple osmosis runs (maps AND POIs)
	env['JAVACMD_OPTIONS'] = '-Xmx' + args.max_heap_space + ' -server -Djava.io.tmpdir=' + os.path.join(base_path, 'tmp') # Setup java temp dir to something a bit more sane (tmpfs /tmp for the loss)
	print('Finding maps to process')
	files_to_process = []
	for dirpath, dirnames, filenames in os.walk(cached_maps_dir):
		for file in filenames:
			print('    Found map: ', end='')
			print(os.path.join(dirpath, file))
			files_to_process.append(os.path.join(dirpath, file))

	print('Processing maps using osmosis')
	osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]
	for file in files_to_process:
		if file.endswith('osm'):
			osmosis_cmd.extend(['--rx', 'file=' + file])
		elif file.endswith('pbf'):
			osmosis_cmd.extend(['--rb', 'file=' + file])
	for x in range(0, len(files_to_process) - 1):
		osmosis_cmd.append('--merge')
	osmosis_cmd.extend(['--mapfile-writer', 'file=' + args.output_map_name + '.map'])
	if args.use_ram:
		osmosis_cmd.extend(['type=ram'])
	else:
		osmosis_cmd.extend(['type=hd'])
	cmd = subprocess.Popen(osmosis_cmd, env=env)
	cmd.wait()

	print('Processing POIs using osmosis')
	osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]
	for file in files_to_process:
		if file.endswith('osm'):
			osmosis_cmd.extend(['--rx', 'file=' + file])
		elif file.endswith('pbf'):
			osmosis_cmd.extend(['--rb', 'file=' + file])
	for x in range(0, len(files_to_process) - 1):
		 osmosis_cmd.append('--merge')
	osmosis_cmd.extend(['--poi-writer', 'file=' + args.output_map_name + '.poi', 'tag-conf-file=' + os.path.join(base_path, 'poi-mapping.xml')])
	cmd = subprocess.Popen(osmosis_cmd, env=env)
	cmd.wait()