Libosmium  2.20.0
Fast and flexible C++ library for working with OpenStreetMap data
bzip2_compression.hpp
Go to the documentation of this file.
1#ifndef OSMIUM_IO_BZIP2_COMPRESSION_HPP
2#define OSMIUM_IO_BZIP2_COMPRESSION_HPP
3
4/*
5
6This file is part of Osmium (https://osmcode.org/libosmium).
7
8Copyright 2013-2023 Jochen Topf <jochen@topf.org> and others (see README).
9
10Boost Software License - Version 1.0 - August 17th, 2003
11
12Permission is hereby granted, free of charge, to any person or organization
13obtaining a copy of the software and accompanying documentation covered by
14this license (the "Software") to use, reproduce, display, distribute,
15execute, and transmit the Software, and to prepare derivative works of the
16Software, and to permit third-parties to whom the Software is furnished to
17do so, all subject to the following:
18
19The copyright notices in the Software and this entire statement, including
20the above license grant, this restriction and the following disclaimer,
21must be included in all copies of the Software, in whole or in part, and
22all derivative works of the Software, unless such copies or derivative
23works are solely in the form of machine-executable object code generated by
24a source language processor.
25
26THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32DEALINGS IN THE SOFTWARE.
33
34*/
35
46#include <osmium/io/detail/read_write.hpp>
47#include <osmium/io/error.hpp>
50#include <osmium/util/file.hpp>
51
52#include <bzlib.h>
53
54#include <cassert>
55#include <cerrno>
56#include <cstdio>
57#include <limits>
58#include <string>
59#include <system_error>
60
61#ifndef _MSC_VER
62# include <unistd.h>
63#endif
64
65namespace osmium {
66
71 struct bzip2_error : public io_error {
72
74 int system_errno = 0;
75
76 bzip2_error(const std::string& what, const int error_code) :
77 io_error(what),
78 bzip2_error_code(error_code) {
79 if (error_code == BZ_IO_ERROR) {
80 system_errno = errno;
81 }
82 }
83
84 }; // struct bzip2_error
85
86 namespace io {
87
88 namespace detail {
89
90 [[noreturn]] inline void throw_bzip2_error(BZFILE* bzfile, const char* msg, const int bzlib_error) {
91 std::string error{"bzip2 error: "};
92 error += msg;
93 error += ": ";
94 int errnum = bzlib_error;
95 if (bzlib_error) {
96 error += std::to_string(bzlib_error);
97 } else if (bzfile) {
98 error += ::BZ2_bzerror(bzfile, &errnum);
99 }
100 throw osmium::bzip2_error{error, errnum};
101 }
102
103 class file_wrapper {
104
105 FILE* m_file = nullptr;
106
107 public:
108
109 file_wrapper() noexcept = default;
110
111 file_wrapper(const int fd, const char* mode) {
112#ifdef _MSC_VER
113 osmium::detail::disable_invalid_parameter_handler diph;
114#endif
115 m_file = fdopen(fd, mode); // NOLINT(cppcoreguidelines-prefer-member-initializer)
116 if (!m_file) {
117
118 // Do not close stdout
119 if (fd != 1) {
120 ::close(fd);
121 }
122 throw std::system_error{errno, std::system_category(), "fdopen failed"};
123 }
124 }
125
126 file_wrapper(const file_wrapper&) = delete;
127 file_wrapper& operator=(const file_wrapper&) = delete;
128
129 file_wrapper(file_wrapper&&) = delete;
130 file_wrapper& operator=(file_wrapper&&) = delete;
131
132 ~file_wrapper() noexcept {
133#ifdef _MSC_VER
134 osmium::detail::disable_invalid_parameter_handler diph;
135#endif
136 if (m_file) {
137 (void)fclose(m_file);
138 }
139 }
140
141 FILE* file() const noexcept {
142 return m_file;
143 }
144
145 void close() {
146#ifdef _MSC_VER
147 osmium::detail::disable_invalid_parameter_handler diph;
148#endif
149 if (m_file) {
150 FILE* wrapped_file = m_file;
151 m_file = nullptr;
152
153 // Do not close stdout
154 if (fileno(wrapped_file) == 1) {
155 return;
156 }
157
158 if (fclose(wrapped_file) != 0) {
159 throw std::system_error{errno, std::system_category(), "fclose failed"};
160 }
161 }
162 }
163
164 }; // class file_wrapper
165
166 } // namespace detail
167
168 class Bzip2Compressor final : public Compressor {
169
170 std::size_t m_file_size = 0;
171 detail::file_wrapper m_file;
172 BZFILE* m_bzfile = nullptr;
173
174 public:
175
176 explicit Bzip2Compressor(const int fd, const fsync sync) :
177 Compressor(sync),
178 m_file(fd, "wb") {
179#ifdef _MSC_VER
180 osmium::detail::disable_invalid_parameter_handler diph;
181#endif
182 int bzerror = BZ_OK;
183 m_bzfile = ::BZ2_bzWriteOpen(&bzerror, m_file.file(), 6, 0, 0);
184 if (!m_bzfile) {
185 throw bzip2_error{"bzip2 error: write open failed", bzerror};
186 }
187 }
188
191
194
195 ~Bzip2Compressor() noexcept override {
196 try {
197 close();
198 } catch (...) {
199 // Ignore any exceptions because destructor must not throw.
200 }
201 }
202
203 void write(const std::string& data) override {
204 assert(data.size() < std::numeric_limits<int>::max());
205 assert(m_bzfile);
206#ifdef _MSC_VER
207 osmium::detail::disable_invalid_parameter_handler diph;
208#endif
209 int bzerror = BZ_OK;
210 ::BZ2_bzWrite(&bzerror, m_bzfile, const_cast<char*>(data.data()), static_cast<int>(data.size()));
211 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
212 detail::throw_bzip2_error(m_bzfile, "write failed", bzerror);
213 }
214 }
215
216 void close() override {
217 if (m_bzfile) {
218#ifdef _MSC_VER
219 osmium::detail::disable_invalid_parameter_handler diph;
220#endif
221 int bzerror = BZ_OK;
222 unsigned int nbytes_out_lo32 = 0;
223 unsigned int nbytes_out_hi32 = 0;
224 ::BZ2_bzWriteClose64(&bzerror, m_bzfile, 0, nullptr, nullptr, &nbytes_out_lo32, &nbytes_out_hi32);
225 m_bzfile = nullptr;
226 if (do_fsync() && m_file.file()) {
227 osmium::io::detail::reliable_fsync(fileno(m_file.file()));
228 }
229 m_file.close();
230 if (bzerror != BZ_OK) {
231 throw bzip2_error{"bzip2 error: write close failed", bzerror};
232 }
233 m_file_size = static_cast<std::size_t>(static_cast<uint64_t>(nbytes_out_hi32) << 32U | nbytes_out_lo32);
234 }
235 }
236
237 std::size_t file_size() const override {
238 return m_file_size;
239 }
240
241 }; // class Bzip2Compressor
242
243 class Bzip2Decompressor final : public Decompressor {
244
245 detail::file_wrapper m_file;
246 BZFILE* m_bzfile = nullptr;
247 bool m_stream_end = false;
248
249 public:
250
251 explicit Bzip2Decompressor(const int fd) :
252 m_file(fd, "rb") {
253#ifdef _MSC_VER
254 osmium::detail::disable_invalid_parameter_handler diph;
255#endif
256 int bzerror = BZ_OK;
257 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, nullptr, 0);
258 if (!m_bzfile) {
259 throw bzip2_error{"bzip2 error: read open failed", bzerror};
260 }
261 }
262
265
268
269 ~Bzip2Decompressor() noexcept override {
270 try {
271 close();
272 } catch (...) {
273 // Ignore any exceptions because destructor must not throw.
274 }
275 }
276
277 std::string read() override {
278 const auto offset = ftell(m_file.file());
279 if (offset > 0 && want_buffered_pages_removed()) {
280 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()), static_cast<std::size_t>(offset));
281 }
282#ifdef _MSC_VER
283 osmium::detail::disable_invalid_parameter_handler diph;
284#endif
285 assert(m_bzfile);
286 std::string buffer;
287
288 if (!m_stream_end) {
290 int bzerror = BZ_OK;
291 assert(buffer.size() < std::numeric_limits<int>::max());
292 const int nread = ::BZ2_bzRead(&bzerror, m_bzfile, &*buffer.begin(), static_cast<int>(buffer.size()));
293 if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
294 detail::throw_bzip2_error(m_bzfile, "read failed", bzerror);
295 }
296 if (bzerror == BZ_STREAM_END) {
297 void* unused = nullptr;
298 int nunused = 0;
299 if (!feof(m_file.file())) {
300 ::BZ2_bzReadGetUnused(&bzerror, m_bzfile, &unused, &nunused);
301 if (bzerror != BZ_OK) {
302 detail::throw_bzip2_error(m_bzfile, "get unused failed", bzerror);
303 }
304 std::string unused_data{static_cast<const char*>(unused), static_cast<std::string::size_type>(nunused)};
305 ::BZ2_bzReadClose(&bzerror, m_bzfile);
306 if (bzerror != BZ_OK) {
307 throw bzip2_error{"bzip2 error: read close failed", bzerror};
308 }
309 assert(unused_data.size() < std::numeric_limits<int>::max());
310 m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, &*unused_data.begin(), static_cast<int>(unused_data.size()));
311 if (!m_bzfile) {
312 throw bzip2_error{"bzip2 error: read open failed", bzerror};
313 }
314 } else {
315 m_stream_end = true;
316 }
317 }
318 buffer.resize(static_cast<std::string::size_type>(nread));
319 }
320
321 set_offset(static_cast<std::size_t>(ftell(m_file.file())));
322
323 return buffer;
324 }
325
326 void close() override {
327 if (m_bzfile) {
329 osmium::io::detail::remove_buffered_pages(fileno(m_file.file()));
330 }
331#ifdef _MSC_VER
332 osmium::detail::disable_invalid_parameter_handler diph;
333#endif
334 int bzerror = BZ_OK;
335 ::BZ2_bzReadClose(&bzerror, m_bzfile);
336 m_bzfile = nullptr;
337 m_file.close();
338 if (bzerror != BZ_OK) {
339 throw bzip2_error{"bzip2 error: read close failed", bzerror};
340 }
341 }
342 }
343
344 }; // class Bzip2Decompressor
345
347
348 const char* m_buffer;
349 std::size_t m_buffer_size;
350 bz_stream m_bzstream;
351
352 public:
353
354 Bzip2BufferDecompressor(const char* buffer, const std::size_t size) :
355 m_buffer(buffer),
356 m_buffer_size(size),
357 m_bzstream() {
358 m_bzstream.next_in = const_cast<char*>(buffer);
359 assert(size < std::numeric_limits<unsigned int>::max());
360 m_bzstream.avail_in = static_cast<unsigned int>(size);
361 const int result = BZ2_bzDecompressInit(&m_bzstream, 0, 0);
362 if (result != BZ_OK) {
363 throw bzip2_error{"bzip2 error: decompression init failed: ", result};
364 }
365 }
366
369
372
373 ~Bzip2BufferDecompressor() noexcept override {
374 try {
375 close();
376 } catch (...) {
377 // Ignore any exceptions because destructor must not throw.
378 }
379 }
380
381 std::string read() override {
382 std::string output;
383
384 if (m_buffer) {
385 const std::size_t buffer_size = 10240;
386 output.resize(buffer_size);
387 m_bzstream.next_out = &*output.begin();
388 m_bzstream.avail_out = buffer_size;
389 const int result = BZ2_bzDecompress(&m_bzstream);
390
391 if (result != BZ_OK) {
392 m_buffer = nullptr;
393 m_buffer_size = 0;
394 }
395
396 if (result != BZ_OK && result != BZ_STREAM_END) {
397 throw bzip2_error{"bzip2 error: decompress failed: ", result};
398 }
399
400 output.resize(static_cast<std::size_t>(m_bzstream.next_out - output.data()));
401 }
402
403 return output;
404 }
405
406 void close() override {
407 BZ2_bzDecompressEnd(&m_bzstream);
408 }
409
410 }; // class Bzip2BufferDecompressor
411
412 namespace detail {
413
414 // we want the register_compression() function to run, setting
415 // the variable is only a side-effect, it will never be used
417 [](const int fd, const fsync sync) { return new osmium::io::Bzip2Compressor{fd, sync}; },
418 [](const int fd) { return new osmium::io::Bzip2Decompressor{fd}; },
419 [](const char* buffer, const std::size_t size) { return new osmium::io::Bzip2BufferDecompressor{buffer, size}; }
420 );
421
422 // dummy function to silence the unused variable warning from above
423 inline bool get_registered_bzip2_compression() noexcept {
424 return registered_bzip2_compression;
425 }
426
427 } // namespace detail
428
429 } // namespace io
430
431} // namespace osmium
432
433#endif // OSMIUM_IO_BZIP2_COMPRESSION_HPP
Definition: bzip2_compression.hpp:346
Bzip2BufferDecompressor(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(const char *buffer, const std::size_t size)
Definition: bzip2_compression.hpp:354
Bzip2BufferDecompressor & operator=(Bzip2BufferDecompressor &&)=delete
Bzip2BufferDecompressor & operator=(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(Bzip2BufferDecompressor &&)=delete
std::string read() override
Definition: bzip2_compression.hpp:381
~Bzip2BufferDecompressor() noexcept override
Definition: bzip2_compression.hpp:373
void close() override
Definition: bzip2_compression.hpp:406
bz_stream m_bzstream
Definition: bzip2_compression.hpp:350
std::size_t m_buffer_size
Definition: bzip2_compression.hpp:349
const char * m_buffer
Definition: bzip2_compression.hpp:348
Definition: bzip2_compression.hpp:168
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:172
Bzip2Compressor(const Bzip2Compressor &)=delete
Bzip2Compressor(Bzip2Compressor &&)=delete
Bzip2Compressor & operator=(const Bzip2Compressor &)=delete
~Bzip2Compressor() noexcept override
Definition: bzip2_compression.hpp:195
std::size_t file_size() const override
Definition: bzip2_compression.hpp:237
Bzip2Compressor(const int fd, const fsync sync)
Definition: bzip2_compression.hpp:176
Bzip2Compressor & operator=(Bzip2Compressor &&)=delete
std::size_t m_file_size
Definition: bzip2_compression.hpp:170
void close() override
Definition: bzip2_compression.hpp:216
void write(const std::string &data) override
Definition: bzip2_compression.hpp:203
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:171
Definition: bzip2_compression.hpp:243
~Bzip2Decompressor() noexcept override
Definition: bzip2_compression.hpp:269
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:246
Bzip2Decompressor & operator=(const Bzip2Decompressor &)=delete
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:245
Bzip2Decompressor & operator=(Bzip2Decompressor &&)=delete
bool m_stream_end
Definition: bzip2_compression.hpp:247
Bzip2Decompressor(Bzip2Decompressor &&)=delete
void close() override
Definition: bzip2_compression.hpp:326
Bzip2Decompressor(const int fd)
Definition: bzip2_compression.hpp:251
Bzip2Decompressor(const Bzip2Decompressor &)=delete
std::string read() override
Definition: bzip2_compression.hpp:277
bool register_compression(osmium::io::file_compression compression, const create_compressor_type &create_compressor, const create_decompressor_type_fd &create_decompressor_fd, const create_decompressor_type_buffer &create_decompressor_buffer)
Definition: compression.hpp:196
static CompressionFactory & instance()
Definition: compression.hpp:191
Definition: compression.hpp:57
bool do_fsync() const noexcept
Definition: compression.hpp:63
Definition: compression.hpp:91
@ input_buffer_size
Definition: compression.hpp:100
void set_offset(const std::size_t offset) noexcept
Definition: compression.hpp:125
bool want_buffered_pages_removed() const noexcept
Definition: compression.hpp:131
Definition: attr.hpp:342
fsync
Definition: writer_options.hpp:51
Namespace for everything in the Osmium library.
Definition: assembler.hpp:53
Definition: bzip2_compression.hpp:71
bzip2_error(const std::string &what, const int error_code)
Definition: bzip2_compression.hpp:76
int bzip2_error_code
Definition: bzip2_compression.hpp:73
int system_errno
Definition: bzip2_compression.hpp:74
Definition: error.hpp:46