Libosmium  2.22.0
Fast and flexible C++ library for working with OpenStreetMap data
bzip2_compression.hpp
Go to the documentation of this file.
1#ifndef OSMIUM_IO_BZIP2_COMPRESSION_HPP
2#define OSMIUM_IO_BZIP2_COMPRESSION_HPP
3
4/*
5
6This file is part of Osmium (https://osmcode.org/libosmium).
7
8Copyright 2013-2025 Jochen Topf <jochen@topf.org> and others (see README).
9
10Boost Software License - Version 1.0 - August 17th, 2003
11
12Permission is hereby granted, free of charge, to any person or organization
13obtaining a copy of the software and accompanying documentation covered by
14this license (the "Software") to use, reproduce, display, distribute,
15execute, and transmit the Software, and to prepare derivative works of the
16Software, and to permit third-parties to whom the Software is furnished to
17do so, all subject to the following:
18
19The copyright notices in the Software and this entire statement, including
20the above license grant, this restriction and the following disclaimer,
21must be included in all copies of the Software, in whole or in part, and
22all derivative works of the Software, unless such copies or derivative
23works are solely in the form of machine-executable object code generated by
24a source language processor.
25
26THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32DEALINGS IN THE SOFTWARE.
33
34*/
35
46#include <osmium/io/detail/read_write.hpp>
47#include <osmium/io/error.hpp>
50#include <osmium/util/file.hpp>
51
52#include <bzlib.h>
53
54#include <cassert>
55#include <cerrno>
56#include <cstdio>
57#include <limits>
58#include <string>
59#include <system_error>
60
61#ifndef _MSC_VER
62# include <unistd.h>
63#endif
64
65namespace osmium {
66
71 struct bzip2_error : public io_error {
72
74 int system_errno = 0;
75
76 bzip2_error(const std::string& what, const int error_code) :
77 io_error(what),
78 bzip2_error_code(error_code) {
79 if (error_code == BZ_IO_ERROR) {
80 system_errno = errno;
81 }
82 }
83
84 }; // struct bzip2_error
85
86 namespace io {
87
88 namespace detail {
89
90 [[noreturn]] inline void throw_bzip2_error(BZFILE* bzfile, const char* msg, const int bzlib_error) {
91 std::string error{"bzip2 error: "};
92 error += msg;
93 error += ": ";
94 int errnum = bzlib_error;
95 if (bzlib_error) {
96 error += std::to_string(bzlib_error);
97 } else if (bzfile) {
98 error += ::BZ2_bzerror(bzfile, &errnum);
99 }
100 throw osmium::bzip2_error{error, errnum};
101 }
102
103 class file_wrapper {
104
105 FILE* m_file = nullptr;
106
107 public:
108
109 file_wrapper() noexcept = default;
110
111 file_wrapper(const int fd, const char* mode) {
112#ifdef _MSC_VER
113 osmium::detail::disable_invalid_parameter_handler diph;
114#endif
115 m_file = fdopen(fd, mode); // NOLINT(cppcoreguidelines-prefer-member-initializer)
116 if (!m_file) {
117
118 // Do not close stdout
119 if (fd != 1) {
120 ::close(fd);
121 }
122 throw std::system_error{errno, std::system_category(), "fdopen failed"};
123 }
124 }
125
126 file_wrapper(const file_wrapper&) = delete;
127 file_wrapper& operator=(const file_wrapper&) = delete;
128
129 file_wrapper(file_wrapper&&) = delete;
130 file_wrapper& operator=(file_wrapper&&) = delete;
131
132 ~file_wrapper() noexcept {
133#ifdef _MSC_VER
134 osmium::detail::disable_invalid_parameter_handler diph;
135#endif
136 if (m_file) {
137 (void)fclose(m_file);
138 }
139 }
140
141 FILE* file() const noexcept {
142 return m_file;
143 }
144
145 void close() {
146#ifdef _MSC_VER
147 osmium::detail::disable_invalid_parameter_handler diph;
148#endif
149 if (m_file) {
150 FILE* wrapped_file = m_file;
151 m_file = nullptr;
152
153 // Do not close stdout
154 if (fileno(wrapped_file) == 1) {
155 return;
156 }
157
158 if (fclose(wrapped_file) != 0) {
159 throw std::system_error{errno, std::system_category(), "fclose failed"};
160 }
161 }
162 }
163
164 }; // class file_wrapper
165
166 } // namespace detail
167
168 class Bzip2Compressor final : public Compressor {
169
170 std::size_t m_file_size = 0;
171 detail::file_wrapper m_file;
172 BZFILE* m_bzfile = nullptr;
173
174 public:
175
176 explicit Bzip2Compressor(const int fd, const fsync sync) :
177 Compressor(sync),
178 m_file(fd, "wb") {
179#ifdef _MSC_VER
180 osmium::detail::disable_invalid_parameter_handler diph;
181#endif
182 int bzerror = BZ_OK;
183 m_bzfile = ::BZ2_bzWriteOpen(&bzerror, m_file.file(), 6, 0, 0);
184 if (!m_bzfile) {
185 throw bzip2_error{"bzip2 error: write open failed", bzerror};
186 }
187 }
188
191
194
195 ~Bzip2Compressor() noexcept override {
196 try {
197 close();
198 } catch (...) { // NOLINT(bugprone-empty-catch)
199 // Ignore any exceptions because destructor must not throw.
200 }
201 }
202
203 void write(const std::string& data) override {
204 assert(data.size() < std::numeric_limits<int>::max());
205 assert(m_bzfile);
206#ifdef _MSC_VER
207 osmium::detail::disable_invalid_parameter_handler diph;
208#endif
209 int bzerror = BZ_OK;
210 ::BZ2_bzWrite(&bzerror, m_bzfile, const_cast<char*>(data.data()), static_cast<int>(data.size()));
211 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
212 detail::throw_bzip2_error(m_bzfile, "write failed", bzerror);
213 }
214 }
215
216 void close() override {
217 if (m_bzfile) {
218#ifdef _MSC_VER
219 osmium::detail::disable_invalid_parameter_handler diph;
220#endif
221 int bzerror = BZ_OK;
222 unsigned int nbytes_out_lo32 = 0;
223 unsigned int nbytes_out_hi32 = 0;
224 ::BZ2_bzWriteClose64(&bzerror, m_bzfile, 0, nullptr, nullptr, &nbytes_out_lo32, &nbytes_out_hi32);
225 m_bzfile = nullptr;
226 if (do_fsync() && m_file.file()) {
227 osmium::io::detail::reliable_fsync(fileno(m_file.file()));
228 }
229 m_file.close();
230 if (bzerror != BZ_OK) {
231 throw bzip2_error{"bzip2 error: write close failed", bzerror};
232 }
233 m_file_size = static_cast<std::size_t>(static_cast<uint64_t>(nbytes_out_hi32) << 32U | nbytes_out_lo32);
234 }
235 }
236
237 std::size_t file_size() const override {
238 return m_file_size;
239 }
240
241 }; // class Bzip2Compressor
242
243 class Bzip2Decompressor final : public Decompressor {
244
245 detail::file_wrapper m_file;
246 BZFILE* m_bzfile = nullptr;
247 bool m_stream_end = false;
248
249 public:
250
251 explicit Bzip2Decompressor(const int fd) :
252 m_file(fd, "rb") {
253#ifdef _MSC_VER
254 osmium::detail::disable_invalid_parameter_handler diph;
255#endif
256 int bzerror = BZ_OK;
257 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, nullptr, 0);
258 if (!m_bzfile) {
259 throw bzip2_error{"bzip2 error: read open failed", bzerror};
260 }
261 }
262
265
268
269 ~Bzip2Decompressor() noexcept override {
270 try {
271 close();
272 } catch (...) { // NOLINT(bugprone-empty-catch)
273 // Ignore any exceptions because destructor must not throw.
274 }
275 }
276
277 std::string read() override {
278 const auto offset = ftell(m_file.file());
279 if (offset > 0 && want_buffered_pages_removed()) {
280 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()), static_cast<std::size_t>(offset));
281 }
282#ifdef _MSC_VER
283 osmium::detail::disable_invalid_parameter_handler diph;
284#endif
285 assert(m_bzfile);
286 std::string buffer;
287
288 if (!m_stream_end) {
290 int bzerror = BZ_OK;
291 assert(buffer.size() < std::numeric_limits<int>::max());
292 const int nread = ::BZ2_bzRead(&bzerror, m_bzfile, &*buffer.begin(), static_cast<int>(buffer.size()));
293 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
294 detail::throw_bzip2_error(m_bzfile, "read failed", bzerror);
295 }
296 if (bzerror == BZ_STREAM_END) {
297 if (!feof(m_file.file())) {
298 void* unused = nullptr;
299 int num_unused = 0;
300 ::BZ2_bzReadGetUnused(&bzerror, m_bzfile, &unused, &num_unused);
301 if (bzerror != BZ_OK) {
302 detail::throw_bzip2_error(m_bzfile, "get unused failed", bzerror);
303 }
304 if (num_unused != 0) {
305 std::string unused_data{static_cast<const char*>(unused), static_cast<std::string::size_type>(num_unused)};
306 ::BZ2_bzReadClose(&bzerror, m_bzfile);
307 if (bzerror != BZ_OK) {
308 throw bzip2_error{"bzip2 error: read close failed", bzerror};
309 }
310 assert(unused_data.size() < std::numeric_limits<int>::max());
311 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, &*unused_data.begin(), static_cast<int>(unused_data.size()));
312 if (!m_bzfile) {
313 throw bzip2_error{"bzip2 error: read open failed", bzerror};
314 }
315 } else {
316 m_stream_end = true;
317 }
318 } else {
319 m_stream_end = true;
320 }
321 }
322 buffer.resize(static_cast<std::string::size_type>(nread));
323 }
324
325 set_offset(static_cast<std::size_t>(ftell(m_file.file())));
326
327 return buffer;
328 }
329
330 void close() override {
331 if (m_bzfile) {
333 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()));
334 }
335#ifdef _MSC_VER
336 osmium::detail::disable_invalid_parameter_handler diph;
337#endif
338 int bzerror = BZ_OK;
339 ::BZ2_bzReadClose(&bzerror, m_bzfile);
340 m_bzfile = nullptr;
341 m_file.close();
342 if (bzerror != BZ_OK) {
343 throw bzip2_error{"bzip2 error: read close failed", bzerror};
344 }
345 }
346 }
347
348 }; // class Bzip2Decompressor
349
351
352 const char* m_buffer;
353 std::size_t m_buffer_size;
354 bz_stream m_bzstream;
355
356 public:
357
358 Bzip2BufferDecompressor(const char* buffer, const std::size_t size) :
359 m_buffer(buffer),
360 m_buffer_size(size),
361 m_bzstream() {
362 m_bzstream.next_in = const_cast<char*>(buffer);
363 assert(size < std::numeric_limits<unsigned int>::max());
364 m_bzstream.avail_in = static_cast<unsigned int>(size);
365 const int result = BZ2_bzDecompressInit(&m_bzstream, 0, 0);
366 if (result != BZ_OK) {
367 throw bzip2_error{"bzip2 error: decompression init failed: ", result};
368 }
369 }
370
373
376
377 ~Bzip2BufferDecompressor() noexcept override {
378 try {
379 close();
380 } catch (...) { // NOLINT(bugprone-empty-catch)
381 // Ignore any exceptions because destructor must not throw.
382 }
383 }
384
385 std::string read() override {
386 std::string output;
387
388 if (m_buffer) {
389 const std::size_t buffer_size = 10240;
390 output.resize(buffer_size);
391 m_bzstream.next_out = &*output.begin();
392 m_bzstream.avail_out = buffer_size;
393 const int result = BZ2_bzDecompress(&m_bzstream);
394
395 if (result != BZ_OK) {
396 m_buffer = nullptr;
397 m_buffer_size = 0;
398 }
399
400 if (result != BZ_OK && result != BZ_STREAM_END) {
401 throw bzip2_error{"bzip2 error: decompress failed: ", result};
402 }
403
404 output.resize(static_cast<std::size_t>(m_bzstream.next_out - output.data()));
405 }
406
407 return output;
408 }
409
410 void close() override {
411 BZ2_bzDecompressEnd(&m_bzstream);
412 }
413
414 }; // class Bzip2BufferDecompressor
415
416 namespace detail {
417
418 // we want the register_compression() function to run, setting
419 // the variable is only a side-effect, it will never be used
421 [](const int fd, const fsync sync) { return new osmium::io::Bzip2Compressor{fd, sync}; },
422 [](const int fd) { return new osmium::io::Bzip2Decompressor{fd}; },
423 [](const char* buffer, const std::size_t size) { return new osmium::io::Bzip2BufferDecompressor{buffer, size}; }
424 );
425
426 // dummy function to silence the unused variable warning from above
427 inline bool get_registered_bzip2_compression() noexcept {
428 return registered_bzip2_compression;
429 }
430
431 } // namespace detail
432
433 } // namespace io
434
435} // namespace osmium
436
437#endif // OSMIUM_IO_BZIP2_COMPRESSION_HPP
Definition: bzip2_compression.hpp:350
Bzip2BufferDecompressor(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(const char *buffer, const std::size_t size)
Definition: bzip2_compression.hpp:358
Bzip2BufferDecompressor & operator=(Bzip2BufferDecompressor &&)=delete
Bzip2BufferDecompressor & operator=(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(Bzip2BufferDecompressor &&)=delete
std::string read() override
Definition: bzip2_compression.hpp:385
~Bzip2BufferDecompressor() noexcept override
Definition: bzip2_compression.hpp:377
void close() override
Definition: bzip2_compression.hpp:410
bz_stream m_bzstream
Definition: bzip2_compression.hpp:354
std::size_t m_buffer_size
Definition: bzip2_compression.hpp:353
const char * m_buffer
Definition: bzip2_compression.hpp:352
Definition: bzip2_compression.hpp:168
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:172
Bzip2Compressor(const Bzip2Compressor &)=delete
Bzip2Compressor(Bzip2Compressor &&)=delete
Bzip2Compressor & operator=(const Bzip2Compressor &)=delete
~Bzip2Compressor() noexcept override
Definition: bzip2_compression.hpp:195
std::size_t file_size() const override
Definition: bzip2_compression.hpp:237
Bzip2Compressor(const int fd, const fsync sync)
Definition: bzip2_compression.hpp:176
Bzip2Compressor & operator=(Bzip2Compressor &&)=delete
std::size_t m_file_size
Definition: bzip2_compression.hpp:170
void close() override
Definition: bzip2_compression.hpp:216
void write(const std::string &data) override
Definition: bzip2_compression.hpp:203
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:171
Definition: bzip2_compression.hpp:243
~Bzip2Decompressor() noexcept override
Definition: bzip2_compression.hpp:269
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:246
Bzip2Decompressor & operator=(const Bzip2Decompressor &)=delete
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:245
Bzip2Decompressor & operator=(Bzip2Decompressor &&)=delete
bool m_stream_end
Definition: bzip2_compression.hpp:247
Bzip2Decompressor(Bzip2Decompressor &&)=delete
void close() override
Definition: bzip2_compression.hpp:330
Bzip2Decompressor(const int fd)
Definition: bzip2_compression.hpp:251
Bzip2Decompressor(const Bzip2Decompressor &)=delete
std::string read() override
Definition: bzip2_compression.hpp:277
bool register_compression(osmium::io::file_compression compression, const create_compressor_type &create_compressor, const create_decompressor_type_fd &create_decompressor_fd, const create_decompressor_type_buffer &create_decompressor_buffer)
Definition: compression.hpp:196
static CompressionFactory & instance()
Definition: compression.hpp:191
Definition: compression.hpp:57
bool do_fsync() const noexcept
Definition: compression.hpp:63
Definition: compression.hpp:91
@ input_buffer_size
Definition: compression.hpp:100
void set_offset(const std::size_t offset) noexcept
Definition: compression.hpp:125
bool want_buffered_pages_removed() const noexcept
Definition: compression.hpp:131
Definition: attr.hpp:342
fsync
Definition: writer_options.hpp:51
Namespace for everything in the Osmium library.
Definition: assembler.hpp:53
Definition: bzip2_compression.hpp:71
bzip2_error(const std::string &what, const int error_code)
Definition: bzip2_compression.hpp:76
int bzip2_error_code
Definition: bzip2_compression.hpp:73
int system_errno
Definition: bzip2_compression.hpp:74
Definition: error.hpp:46