Libosmium  v2.23.0
Fast and flexible C++ library for working with OpenStreetMap data
Loading...
Searching...
No Matches
bzip2_compression.hpp
Go to the documentation of this file.
1#ifndef OSMIUM_IO_BZIP2_COMPRESSION_HPP
2#define OSMIUM_IO_BZIP2_COMPRESSION_HPP
3
4/*
5
6This file is part of Osmium (https://osmcode.org/libosmium).
7
8Copyright 2013-2026 Jochen Topf <jochen@topf.org> and others (see README).
9
10Boost Software License - Version 1.0 - August 17th, 2003
11
12Permission is hereby granted, free of charge, to any person or organization
13obtaining a copy of the software and accompanying documentation covered by
14this license (the "Software") to use, reproduce, display, distribute,
15execute, and transmit the Software, and to prepare derivative works of the
16Software, and to permit third-parties to whom the Software is furnished to
17do so, all subject to the following:
18
19The copyright notices in the Software and this entire statement, including
20the above license grant, this restriction and the following disclaimer,
21must be included in all copies of the Software, in whole or in part, and
22all derivative works of the Software, unless such copies or derivative
23works are solely in the form of machine-executable object code generated by
24a source language processor.
25
26THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32DEALINGS IN THE SOFTWARE.
33
34*/
35
46#include <osmium/io/detail/read_write.hpp>
47#include <osmium/io/error.hpp>
50#include <osmium/util/file.hpp>
51
52#include <bzlib.h>
53
54#include <cassert>
55#include <cerrno>
56#include <cstdio>
57#include <limits>
58#include <string>
59#include <system_error>
60
61#ifndef _MSC_VER
62# include <unistd.h>
63#endif
64
65namespace osmium {
66
71 struct bzip2_error : public io_error {
72
74 int system_errno = 0;
75
76 bzip2_error(const std::string& what, const int error_code) :
77 io_error(what),
78 bzip2_error_code(error_code) {
79 if (error_code == BZ_IO_ERROR) {
80 system_errno = errno;
81 }
82 }
83
84 }; // struct bzip2_error
85
86 namespace io {
87
88 namespace detail {
89
90 [[noreturn]] inline void throw_bzip2_error(BZFILE* bzfile, const char* msg, const int bzlib_error) {
91 std::string error{"bzip2 error: "};
92 error += msg;
93 error += ": ";
94 int errnum = bzlib_error;
95 if (bzlib_error) {
96 error += std::to_string(bzlib_error);
97 } else if (bzfile) {
98 error += ::BZ2_bzerror(bzfile, &errnum);
99 }
100 throw osmium::bzip2_error{error, errnum};
101 }
102
103 class file_wrapper {
104
105 FILE* m_file = nullptr;
106
107 public:
108
109 file_wrapper() noexcept = default;
110
111 file_wrapper(const int fd, const char* mode) {
112 if (fd < 0) {
113 throw std::system_error{errno, std::system_category(), "file descriptor must be >= 0"};
114 }
115#ifdef _MSC_VER
116 osmium::detail::disable_invalid_parameter_handler diph;
117#endif
118 m_file = fdopen(fd, mode); // NOLINT(cppcoreguidelines-prefer-member-initializer)
119 if (!m_file) {
120
121 // Do not close stdout
122 if (fd != 1) {
123 ::close(fd);
124 }
125 throw std::system_error{errno, std::system_category(), "fdopen failed"};
126 }
127 }
128
129 file_wrapper(const file_wrapper&) = delete;
130 file_wrapper& operator=(const file_wrapper&) = delete;
131
132 file_wrapper(file_wrapper&&) = delete;
133 file_wrapper& operator=(file_wrapper&&) = delete;
134
135 ~file_wrapper() noexcept {
136#ifdef _MSC_VER
137 osmium::detail::disable_invalid_parameter_handler diph;
138#endif
139 if (m_file) {
140 (void)fclose(m_file);
141 }
142 }
143
144 FILE* file() const noexcept {
145 return m_file;
146 }
147
148 void close() {
149#ifdef _MSC_VER
150 osmium::detail::disable_invalid_parameter_handler diph;
151#endif
152 if (m_file) {
153 FILE* wrapped_file = m_file;
154 m_file = nullptr;
155
156 // Do not close stdout
157 if (fileno(wrapped_file) == 1) {
158 return; // NOLINT(clang-analyzer-unix.Stream)
159 }
160
161 if (fclose(wrapped_file) != 0) {
162 throw std::system_error{errno, std::system_category(), "fclose failed"};
163 }
164 }
165 }
166
167 }; // class file_wrapper
168
169 } // namespace detail
170
171 class Bzip2Compressor final : public Compressor {
172
173 std::size_t m_file_size = 0;
174 detail::file_wrapper m_file;
175 BZFILE* m_bzfile = nullptr;
176
177 public:
178
179 explicit Bzip2Compressor(const int fd, const fsync sync) :
180 Compressor(sync),
181 m_file(fd, "wb") {
182#ifdef _MSC_VER
183 osmium::detail::disable_invalid_parameter_handler diph;
184#endif
185 int bzerror = BZ_OK;
186 m_bzfile = ::BZ2_bzWriteOpen(&bzerror, m_file.file(), 6, 0, 0);
187 if (!m_bzfile) {
188 throw bzip2_error{"bzip2 error: write open failed", bzerror};
189 }
190 }
191
194
197
198 ~Bzip2Compressor() noexcept override {
199 try {
200 close();
201 } catch (...) { // NOLINT(bugprone-empty-catch)
202 // Ignore any exceptions because destructor must not throw.
203 }
204 }
205
206 void write(const std::string& data) override {
207 assert(data.size() < std::numeric_limits<int>::max());
208 assert(m_bzfile);
209#ifdef _MSC_VER
210 osmium::detail::disable_invalid_parameter_handler diph;
211#endif
212 int bzerror = BZ_OK;
213 ::BZ2_bzWrite(&bzerror, m_bzfile, const_cast<char*>(data.data()), static_cast<int>(data.size()));
214 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
215 detail::throw_bzip2_error(m_bzfile, "write failed", bzerror);
216 }
217 }
218
219 void close() override {
220 if (m_bzfile) {
221#ifdef _MSC_VER
222 osmium::detail::disable_invalid_parameter_handler diph;
223#endif
224 int bzerror = BZ_OK;
225 unsigned int nbytes_out_lo32 = 0;
226 unsigned int nbytes_out_hi32 = 0;
227 ::BZ2_bzWriteClose64(&bzerror, m_bzfile, 0, nullptr, nullptr, &nbytes_out_lo32, &nbytes_out_hi32);
228 m_bzfile = nullptr;
229 if (do_fsync() && m_file.file()) {
230 osmium::io::detail::reliable_fsync(fileno(m_file.file()));
231 }
232 m_file.close();
233 if (bzerror != BZ_OK) {
234 throw bzip2_error{"bzip2 error: write close failed", bzerror};
235 }
236 m_file_size = static_cast<std::size_t>(static_cast<uint64_t>(nbytes_out_hi32) << 32U | nbytes_out_lo32);
237 }
238 }
239
240 std::size_t file_size() const override {
241 return m_file_size;
242 }
243
244 }; // class Bzip2Compressor
245
246 class Bzip2Decompressor final : public Decompressor {
247
248 detail::file_wrapper m_file;
249 BZFILE* m_bzfile = nullptr;
250 bool m_stream_end = false;
251
252 public:
253
254 explicit Bzip2Decompressor(const int fd) :
255 m_file(fd, "rb") {
256#ifdef _MSC_VER
257 osmium::detail::disable_invalid_parameter_handler diph;
258#endif
259 int bzerror = BZ_OK;
260 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, nullptr, 0);
261 if (!m_bzfile) {
262 throw bzip2_error{"bzip2 error: read open failed", bzerror};
263 }
264 }
265
268
271
272 ~Bzip2Decompressor() noexcept override {
273 try {
274 close();
275 } catch (...) { // NOLINT(bugprone-empty-catch)
276 // Ignore any exceptions because destructor must not throw.
277 }
278 }
279
280 std::string read() override {
281 const auto offset = ftell(m_file.file());
282 if (offset > 0 && want_buffered_pages_removed()) {
283 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()), static_cast<std::size_t>(offset));
284 }
285#ifdef _MSC_VER
286 osmium::detail::disable_invalid_parameter_handler diph;
287#endif
288 assert(m_bzfile);
289 std::string buffer;
290
291 if (!m_stream_end) {
293 int bzerror = BZ_OK;
294 assert(buffer.size() < std::numeric_limits<int>::max());
295 const int nread = ::BZ2_bzRead(&bzerror, m_bzfile, &*buffer.begin(), static_cast<int>(buffer.size()));
296 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
297 detail::throw_bzip2_error(m_bzfile, "read failed", bzerror);
298 }
299 if (bzerror == BZ_STREAM_END) {
300 if (!feof(m_file.file())) {
301 void* unused = nullptr;
302 int num_unused = 0;
303 ::BZ2_bzReadGetUnused(&bzerror, m_bzfile, &unused, &num_unused);
304 if (bzerror != BZ_OK) {
305 detail::throw_bzip2_error(m_bzfile, "get unused failed", bzerror);
306 }
307 if (num_unused != 0) {
308 std::string unused_data{static_cast<const char*>(unused), static_cast<std::string::size_type>(num_unused)};
309 ::BZ2_bzReadClose(&bzerror, m_bzfile);
310 if (bzerror != BZ_OK) {
311 throw bzip2_error{"bzip2 error: read close failed", bzerror};
312 }
313 assert(unused_data.size() < std::numeric_limits<int>::max());
314 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, &*unused_data.begin(), static_cast<int>(unused_data.size()));
315 if (!m_bzfile) {
316 throw bzip2_error{"bzip2 error: read open failed", bzerror};
317 }
318 } else {
319 // Close current stream and try to open a new one for multi-stream files
320 ::BZ2_bzReadClose(&bzerror, m_bzfile);
321 if (bzerror != BZ_OK) {
322 throw bzip2_error{"bzip2 error: read close failed", bzerror};
323 }
324 // Try to open a new stream - there might be more bzip2 streams concatenated
325 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, nullptr, 0);
326 if (!m_bzfile || bzerror != BZ_OK) {
327 // If we can't open a new stream, we've truly reached the end
328 m_stream_end = true;
329 m_bzfile = nullptr;
330 }
331 }
332 } else {
333 m_stream_end = true;
334 }
335 }
336 buffer.resize(static_cast<std::string::size_type>(nread));
337 }
338
339 set_offset(static_cast<std::size_t>(ftell(m_file.file())));
340
341 return buffer;
342 }
343
344 void close() override {
345 if (m_bzfile) {
347 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()));
348 }
349#ifdef _MSC_VER
350 osmium::detail::disable_invalid_parameter_handler diph;
351#endif
352 int bzerror = BZ_OK;
353 ::BZ2_bzReadClose(&bzerror, m_bzfile);
354 m_bzfile = nullptr;
355 m_file.close();
356 if (bzerror != BZ_OK) {
357 throw bzip2_error{"bzip2 error: read close failed", bzerror};
358 }
359 }
360 }
361
362 }; // class Bzip2Decompressor
363
365
366 const char* m_buffer;
367 std::size_t m_buffer_size;
368 bz_stream m_bzstream;
369
370 public:
371
372 Bzip2BufferDecompressor(const char* buffer, const std::size_t size) :
373 m_buffer(buffer),
374 m_buffer_size(size),
375 m_bzstream() {
376 m_bzstream.next_in = const_cast<char*>(buffer);
377 assert(size < std::numeric_limits<unsigned int>::max());
378 m_bzstream.avail_in = static_cast<unsigned int>(size);
379 const int result = BZ2_bzDecompressInit(&m_bzstream, 0, 0);
380 if (result != BZ_OK) {
381 throw bzip2_error{"bzip2 error: decompression init failed: ", result};
382 }
383 }
384
387
390
391 ~Bzip2BufferDecompressor() noexcept override {
392 try {
393 close();
394 } catch (...) { // NOLINT(bugprone-empty-catch)
395 // Ignore any exceptions because destructor must not throw.
396 }
397 }
398
399 std::string read() override {
400 std::string output;
401
402 if (m_buffer) {
403 const std::size_t buffer_size = 10240;
404 output.resize(buffer_size);
405 m_bzstream.next_out = &*output.begin();
406 m_bzstream.avail_out = buffer_size;
407 const int result = BZ2_bzDecompress(&m_bzstream);
408
409 if (result != BZ_OK) {
410 m_buffer = nullptr;
411 m_buffer_size = 0;
412 }
413
414 if (result != BZ_OK && result != BZ_STREAM_END) {
415 throw bzip2_error{"bzip2 error: decompress failed: ", result};
416 }
417
418 output.resize(static_cast<std::size_t>(m_bzstream.next_out - output.data()));
419 }
420
421 return output;
422 }
423
424 void close() override {
425 BZ2_bzDecompressEnd(&m_bzstream);
426 }
427
428 }; // class Bzip2BufferDecompressor
429
430 namespace detail {
431
432 // we want the register_compression() function to run, setting
433 // the variable is only a side-effect, it will never be used
435 [](const int fd, const fsync sync) { return new osmium::io::Bzip2Compressor{fd, sync}; },
436 [](const int fd) { return new osmium::io::Bzip2Decompressor{fd}; },
437 [](const char* buffer, const std::size_t size) { return new osmium::io::Bzip2BufferDecompressor{buffer, size}; }
438 );
439
440 // dummy function to silence the unused variable warning from above
441 inline bool get_registered_bzip2_compression() noexcept {
442 return registered_bzip2_compression;
443 }
444
445 } // namespace detail
446
447 } // namespace io
448
449} // namespace osmium
450
451#endif // OSMIUM_IO_BZIP2_COMPRESSION_HPP
Definition bzip2_compression.hpp:364
Bzip2BufferDecompressor(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(const char *buffer, const std::size_t size)
Definition bzip2_compression.hpp:372
Bzip2BufferDecompressor & operator=(Bzip2BufferDecompressor &&)=delete
Bzip2BufferDecompressor & operator=(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(Bzip2BufferDecompressor &&)=delete
std::string read() override
Definition bzip2_compression.hpp:399
~Bzip2BufferDecompressor() noexcept override
Definition bzip2_compression.hpp:391
void close() override
Definition bzip2_compression.hpp:424
bz_stream m_bzstream
Definition bzip2_compression.hpp:368
std::size_t m_buffer_size
Definition bzip2_compression.hpp:367
const char * m_buffer
Definition bzip2_compression.hpp:366
Definition bzip2_compression.hpp:171
BZFILE * m_bzfile
Definition bzip2_compression.hpp:175
Bzip2Compressor(const Bzip2Compressor &)=delete
Bzip2Compressor(Bzip2Compressor &&)=delete
Bzip2Compressor & operator=(const Bzip2Compressor &)=delete
~Bzip2Compressor() noexcept override
Definition bzip2_compression.hpp:198
std::size_t file_size() const override
Definition bzip2_compression.hpp:240
Bzip2Compressor(const int fd, const fsync sync)
Definition bzip2_compression.hpp:179
Bzip2Compressor & operator=(Bzip2Compressor &&)=delete
std::size_t m_file_size
Definition bzip2_compression.hpp:173
void close() override
Definition bzip2_compression.hpp:219
void write(const std::string &data) override
Definition bzip2_compression.hpp:206
detail::file_wrapper m_file
Definition bzip2_compression.hpp:174
Definition bzip2_compression.hpp:246
~Bzip2Decompressor() noexcept override
Definition bzip2_compression.hpp:272
BZFILE * m_bzfile
Definition bzip2_compression.hpp:249
Bzip2Decompressor & operator=(const Bzip2Decompressor &)=delete
detail::file_wrapper m_file
Definition bzip2_compression.hpp:248
Bzip2Decompressor & operator=(Bzip2Decompressor &&)=delete
bool m_stream_end
Definition bzip2_compression.hpp:250
Bzip2Decompressor(Bzip2Decompressor &&)=delete
void close() override
Definition bzip2_compression.hpp:344
Bzip2Decompressor(const int fd)
Definition bzip2_compression.hpp:254
Bzip2Decompressor(const Bzip2Decompressor &)=delete
std::string read() override
Definition bzip2_compression.hpp:280
bool register_compression(osmium::io::file_compression compression, const create_compressor_type &create_compressor, const create_decompressor_type_fd &create_decompressor_fd, const create_decompressor_type_buffer &create_decompressor_buffer)
Definition compression.hpp:196
static CompressionFactory & instance()
Definition compression.hpp:191
Definition compression.hpp:57
bool do_fsync() const noexcept
Definition compression.hpp:63
Definition compression.hpp:91
@ input_buffer_size
Definition compression.hpp:100
void set_offset(const std::size_t offset) noexcept
Definition compression.hpp:125
bool want_buffered_pages_removed() const noexcept
Definition compression.hpp:131
Definition attr.hpp:342
fsync
Definition writer_options.hpp:51
Namespace for everything in the Osmium library.
Definition assembler.hpp:53
Definition bzip2_compression.hpp:71
bzip2_error(const std::string &what, const int error_code)
Definition bzip2_compression.hpp:76
int bzip2_error_code
Definition bzip2_compression.hpp:73
int system_errno
Definition bzip2_compression.hpp:74
Definition error.hpp:46