osm_map_processing/process_maps.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2016 Mike "KemoNine" Crosson
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import subprocess, sys, os, pprint, datetime, argparse, time

base_path = os.path.dirname(os.path.realpath(__file__))
env = os.environ.copy()

FNULL = open(os.devnull, 'w')

wget_cmd = 'wget'
bunzip2_cmd = 'bunzip2'

if __name__ == '__main__':
	current_timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M')
	
	parser = argparse.ArgumentParser()
	parser.add_argument('--map-list', action='append',
                        help='a text file with one map URL per line, can be specified more than once')
	parser.add_argument('--no-sleep', action='store_true',
                        help='don\'t sleep between downloads -- WARNING you can easily run into throttling on mirrors if you use this option')
	parser.add_argument('--use-ram', action='store_true',
                        help='use RAM for mapsforge processing -- WARNING mapsforge uses 10x the map size in RAM for processing (ie. 100Mb map = 1Gb RAM usage), you want a LOT of RAM for this option')
	parser.add_argument('--max-heap-space', action='store', default='4g',
                        help='set the max heap space for the JVM, use standard -Xmx values, default (4g) should be fine if not using --use-ram argument')
	parser.add_argument('--output-map-name', action='store', default='output',
                        help='set the output .map and .poi file names')
	parser.add_argument('--cached-maps-dir', action='store',
                        help='Use cached maps in the specified directory instead of downloads using map lists')
	#TODO: Add argument to pass path to osmosis
	#TODO: Add argument to pass path to output dir
	args = parser.parse_args()

	if args.map_list is None and args.cached_maps_dir is None:
		print('You MUST specify at least one map-list or cached-maps-dir')
		sys.exit(1)

	# Normalize map path directory based on CLI arg ahead of any path manipulations
	# DEFAULTS TO 'dl' (non-normalized) to retain original behavior of script
	cached_maps_dir = 'dl'
	if args.cached_maps_dir is not None:
		cached_maps_dir = os.path.abspath(args.cached_maps_dir)

	map_list = []

	if args.map_list is not None:
		for alist in args.map_list:
			with open(alist, 'r') as maps:
				for line in maps:
					map_list.append(line.strip())

	print('Creating working directories')
	if not os.path.exists('tmp'):
		os.makedirs('tmp')
	if not os.path.exists('out'):
		os.makedirs('out')
	os.chdir('out')
	output_dir = current_timestamp + '-' + args.output_map_name
	if not os.path.exists(output_dir):
		os.makedirs(output_dir)
	os.chdir(output_dir)

	if args.map_list is not None:
		if not os.path.exists('dl'):
			os.makedirs('dl')

		print('Downloading maps')
		for line in map_list:
			print('    ', end='')
			print(line)
			subprocess.run([wget_cmd, '-P', 'dl', line.strip()], stdout=FNULL, stderr=subprocess.STDOUT)
			if not args.no_sleep:
				print('    Sleeping to prevent throttle/blocking')
				time.sleep(300) # Seconds

		print('Decompressing maps (if necessary)')
		for dirpath, dirnames, filenames in os.walk('dl'):
			for file in filenames:
				if file.endswith('bz2'):
					print('    ', end='')
					print(file)
					subprocess.run([bunzip2_cmd, os.path.join(dirpath, file)])
				
	# Setup various runtime aspects (going to do multiple osmosis runs (maps AND POIs)
	env['JAVACMD_OPTIONS'] = '-Xmx' + args.max_heap_space + ' -server -Djava.io.tmpdir=' + os.path.join(base_path, 'tmp') # Setup java temp dir to something a bit more sane (tmpfs /tmp for the loss)
	print('Finding maps to process')
	files_to_process = []
	for dirpath, dirnames, filenames in os.walk(cached_maps_dir):
		for file in filenames:
			print('    Found map: ', end='')
			print(os.path.join(dirpath, file))
			files_to_process.append(os.path.join(dirpath, file))
	
	print('Processing maps using osmosis')
	osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]
	for file in files_to_process:
		if file.endswith('osm'):
			osmosis_cmd.extend(['--rx', 'file=' + file])
		elif file.endswith('pbf'):
			osmosis_cmd.extend(['--rb', 'file=' + file])
	for x in range(0, len(files_to_process) - 1):
		osmosis_cmd.append('--merge')
	osmosis_cmd.extend(['--mapfile-writer', 'file=' + args.output_map_name + '.map'])
	if args.use_ram:
		osmosis_cmd.extend(['type=ram'])
	else:
		osmosis_cmd.extend(['type=hd'])
	cmd = subprocess.Popen(osmosis_cmd, env=env)
	cmd.wait()

	print('Processing POIs using osmosis')
	osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]
	for file in files_to_process:
		if file.endswith('osm'):
			osmosis_cmd.extend(['--rx', 'file=' + file])
		elif file.endswith('pbf'):
			osmosis_cmd.extend(['--rb', 'file=' + file])
	for x in range(0, len(files_to_process) - 1):
		 osmosis_cmd.append('--merge')
	osmosis_cmd.extend(['--poi-writer', 'file=' + args.output_map_name + '.poi', 'tag-conf-file=' + os.path.join(base_path, 'poi-mapping.xml')])
	cmd = subprocess.Popen(osmosis_cmd, env=env)
	cmd.wait()
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

Added licencing 2016-01-12 22:51:36 +00:00			`# Copyright 2016 Mike "KemoNine" Crosson`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`import subprocess, sys, os, pprint, datetime, argparse, time`

			`base_path = os.path.dirname(os.path.realpath(__file__))`
Add temp dir to variable used by osmosis startup script to prevent crash on run 2016-01-13 00:00:16 +00:00			`env = os.environ.copy()`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
			`FNULL = open(os.devnull, 'w')`

Misc updates/fixes (see below) - Added -server to JVM options to help increase performance during long-running creations (per osmosis perf tuning) - Improved command help text and usage info - Adjusted how wget/bunzip2 are found (improves reliability in finding them) - Added option to NOT sleep between downloads (use this at your own risk, mirrors WILL throttle generally) - Added option to use RAM instead of HD for processing (mapsforge eats RAM, be careful with this) 2017-01-20 17:29:54 +00:00			`wget_cmd = 'wget'`
			`bunzip2_cmd = 'bunzip2'`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
			`if __name__ == '__main__':`
			`current_timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M')`

			`parser = argparse.ArgumentParser()`
Misc updates/fixes (see below) - Added -server to JVM options to help increase performance during long-running creations (per osmosis perf tuning) - Improved command help text and usage info - Adjusted how wget/bunzip2 are found (improves reliability in finding them) - Added option to NOT sleep between downloads (use this at your own risk, mirrors WILL throttle generally) - Added option to use RAM instead of HD for processing (mapsforge eats RAM, be careful with this) 2017-01-20 17:29:54 +00:00			`parser.add_argument('--map-list', action='append',`
			`help='a text file with one map URL per line, can be specified more than once')`
			`parser.add_argument('--no-sleep', action='store_true',`
			`help='don\'t sleep between downloads -- WARNING you can easily run into throttling on mirrors if you use this option')`
			`parser.add_argument('--use-ram', action='store_true',`
			`help='use RAM for mapsforge processing -- WARNING mapsforge uses 10x the map size in RAM for processing (ie. 100Mb map = 1Gb RAM usage), you want a LOT of RAM for this option')`
Added ability to adjust heap size -- most useful when using --use-ram argument 2017-01-20 19:24:41 +00:00			`parser.add_argument('--max-heap-space', action='store', default='4g',`
			`help='set the max heap space for the JVM, use standard -Xmx values, default (4g) should be fine if not using --use-ram argument')`
Add ability to specify output file names 2018-02-25 19:01:12 +00:00			`parser.add_argument('--output-map-name', action='store', default='output',`
			`help='set the output .map and .poi file names')`
Add support for map download caching 2018-02-25 20:09:36 +00:00			`parser.add_argument('--cached-maps-dir', action='store',`
			`help='Use cached maps in the specified directory instead of downloads using map lists')`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`#TODO: Add argument to pass path to osmosis`
			`#TODO: Add argument to pass path to output dir`
			`args = parser.parse_args()`

Add support for map download caching 2018-02-25 20:09:36 +00:00			`if args.map_list is None and args.cached_maps_dir is None:`
			`print('You MUST specify at least one map-list or cached-maps-dir')`
			`sys.exit(1)`

			`# Normalize map path directory based on CLI arg ahead of any path manipulations`
			`# DEFAULTS TO 'dl' (non-normalized) to retain original behavior of script`
			`cached_maps_dir = 'dl'`
			`if args.cached_maps_dir is not None:`
			`cached_maps_dir = os.path.abspath(args.cached_maps_dir)`

Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`map_list = []`
Add support for map download caching 2018-02-25 20:09:36 +00:00
			`if args.map_list is not None:`
			`for alist in args.map_list:`
			`with open(alist, 'r') as maps:`
			`for line in maps:`
			`map_list.append(line.strip())`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
			`print('Creating working directories')`
Added on-disk processing of maps due to in-memory being "too big" for "large" map processing 2016-01-12 22:20:07 +00:00			`if not os.path.exists('tmp'):`
			`os.makedirs('tmp')`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`if not os.path.exists('out'):`
			`os.makedirs('out')`
			`os.chdir('out')`
Add support for map download caching 2018-02-25 20:09:36 +00:00			`output_dir = current_timestamp + '-' + args.output_map_name`
			`if not os.path.exists(output_dir):`
			`os.makedirs(output_dir)`
			`os.chdir(output_dir)`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
Add support for map download caching 2018-02-25 20:09:36 +00:00			`if args.map_list is not None:`
			`if not os.path.exists('dl'):`
			`os.makedirs('dl')`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
Add support for map download caching 2018-02-25 20:09:36 +00:00			`print('Downloading maps')`
			`for line in map_list:`
			`print(' ', end='')`
			`print(line)`
			`subprocess.run([wget_cmd, '-P', 'dl', line.strip()], stdout=FNULL, stderr=subprocess.STDOUT)`
			`if not args.no_sleep:`
			`print(' Sleeping to prevent throttle/blocking')`
			`time.sleep(300) # Seconds`

			`print('Decompressing maps (if necessary)')`
			`for dirpath, dirnames, filenames in os.walk('dl'):`
			`for file in filenames:`
			`if file.endswith('bz2'):`
			`print(' ', end='')`
			`print(file)`
			`subprocess.run([bunzip2_cmd, os.path.join(dirpath, file)])`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
Added POI processing using mapsforge POI writer 2016-08-28 18:05:11 +00:00			`# Setup various runtime aspects (going to do multiple osmosis runs (maps AND POIs)`
Added ability to adjust heap size -- most useful when using --use-ram argument 2017-01-20 19:24:41 +00:00			`env['JAVACMD_OPTIONS'] = '-Xmx' + args.max_heap_space + ' -server -Djava.io.tmpdir=' + os.path.join(base_path, 'tmp') # Setup java temp dir to something a bit more sane (tmpfs /tmp for the loss)`
Add support for map download caching 2018-02-25 20:09:36 +00:00			`print('Finding maps to process')`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`files_to_process = []`
Add support for map download caching 2018-02-25 20:09:36 +00:00			`for dirpath, dirnames, filenames in os.walk(cached_maps_dir):`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`for file in filenames:`
			`print(' Found map: ', end='')`
			`print(os.path.join(dirpath, file))`
			`files_to_process.append(os.path.join(dirpath, file))`
Added POI processing using mapsforge POI writer 2016-08-28 18:05:11 +00:00
			`print('Processing maps using osmosis')`
Add temp dir to variable used by osmosis startup script to prevent crash on run 2016-01-13 00:00:16 +00:00			`osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00			`for file in files_to_process:`
			`if file.endswith('osm'):`
			`osmosis_cmd.extend(['--rx', 'file=' + file])`
			`elif file.endswith('pbf'):`
			`osmosis_cmd.extend(['--rb', 'file=' + file])`
			`for x in range(0, len(files_to_process) - 1):`
			`osmosis_cmd.append('--merge')`
Add ability to specify output file names 2018-02-25 19:01:12 +00:00			`osmosis_cmd.extend(['--mapfile-writer', 'file=' + args.output_map_name + '.map'])`
Misc updates/fixes (see below) - Added -server to JVM options to help increase performance during long-running creations (per osmosis perf tuning) - Improved command help text and usage info - Adjusted how wget/bunzip2 are found (improves reliability in finding them) - Added option to NOT sleep between downloads (use this at your own risk, mirrors WILL throttle generally) - Added option to use RAM instead of HD for processing (mapsforge eats RAM, be careful with this) 2017-01-20 17:29:54 +00:00			`if args.use_ram:`
			`osmosis_cmd.extend(['type=ram'])`
			`else:`
			`osmosis_cmd.extend(['type=hd'])`
Add temp dir to variable used by osmosis startup script to prevent crash on run 2016-01-13 00:00:16 +00:00			`cmd = subprocess.Popen(osmosis_cmd, env=env)`
			`cmd.wait()`
Added test map list, implemented map processing including combining into single output map file 2016-01-12 21:17:06 +00:00
Added POI processing using mapsforge POI writer 2016-08-28 18:05:11 +00:00			`print('Processing POIs using osmosis')`
			`osmosis_cmd = [os.path.join(base_path, 'bin', 'osmosis', 'bin', 'osmosis')]`
			`for file in files_to_process:`
			`if file.endswith('osm'):`
			`osmosis_cmd.extend(['--rx', 'file=' + file])`
			`elif file.endswith('pbf'):`
			`osmosis_cmd.extend(['--rb', 'file=' + file])`
			`for x in range(0, len(files_to_process) - 1):`
			`osmosis_cmd.append('--merge')`
Add ability to specify output file names 2018-02-25 19:01:12 +00:00			`osmosis_cmd.extend(['--poi-writer', 'file=' + args.output_map_name + '.poi', 'tag-conf-file=' + os.path.join(base_path, 'poi-mapping.xml')])`
Added POI processing using mapsforge POI writer 2016-08-28 18:05:11 +00:00			`cmd = subprocess.Popen(osmosis_cmd, env=env)`
			`cmd.wait()`