Detecting and fixing Memory Issues

daan-mooij-674206-unsplash

There are two main tools I like to use for any memory related stuff in my c++ code. Following is an example code I had written as my implementation of a shared pointer. I think my code works as expected but not sure how much memory I am leaking if any.

#include <iostream>

template <typename T>
class MySharedPtr{
  public:
    MySharedPtr(T val){
      resource_ptr = new T(val);
      c_ptr = new int(1);
    };

  MySharedPtr<T>(const MySharedPtr<T> & ptr){
    std::cout << "copy constructor \n";
    resource_ptr = ptr.resource_ptr;
    (*ptr.c_ptr)++;
    this->c_ptr = ptr.c_ptr;
  }

  MySharedPtr<T>& operator=(const MySharedPtr<T> & ptr){
    std::cout << "copy assignment operator\n";
    if(this != &ptr){
      (*ptr.c_ptr)++;
      this->c_ptr = ptr.c_ptr;
      this->resource_ptr = ptr.resource_ptr;
     }
    return *this;
  }

  // delete move constructors...
  MySharedPtr(MySharedPtr && ptr) = delete;
  MySharedPtr & operator=(MySharedPtr && ptr) = delete;

  MySharedPtr & operator *(){
    return resource_ptr;
  }

  MySharedPtr* operator ->(){
    return resource_ptr;
  }

  virtual ~MySharedPtr(){
    std::cout << *c_ptr << '\n';
    if(*c_ptr > 0){
      (*c_ptr)--;
    }else{
      delete c_ptr;
      delete resource_ptr;
    }
  }

  int use_count(){
    return *c_ptr;
  }

  private:
    int * c_ptr;
  T * resource_ptr;
};

int main(){
  int x;
    MySharedPtr<decltype(x)> sp(1);
  {
    MySharedPtr<decltype(x)> sp1(2);
    sp1 = sp;
    std::cout << "use count is " << sp.use_count() << '\n';
  }
  
  sp = sp;
  std::cout << "use count is " << sp.use_count() << '\n';
  return 0;
}

Google Sanitizer

The command:

/usr/local/Cellar/gcc/8.2.0/bin/g++-8 -std=c++17 -g -o2 -fsanitize=address -fno-omit-frame-pointer shared_ptr.cpp -o shared_ptr && ASAN_OPTIONS=detect_leaks=1 ./shared_ptr

The output:

copy assignment operator
use count is 2
2
copy assignment operator
use count is 1
1

=================================================================
==25533==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4 byte(s) in 1 object(s) allocated from:
    #0 0x526b18 in operator new(unsigned long) (/home/aarna/devel/practice/smart_ptrs+0x526b18)
    #1 0x52b50c in MySharedPtr<int>::MySharedPtr(int) /home/aarna/devel/practice/smart_ptrs.cpp:9:19
    #2 0x52b112 in main /home/aarna/devel/practice/smart_ptrs.cpp:62:28
    #3 0x7f6507f483d4 in __libc_start_main /usr/src/debug/glibc-2.17-c758a686/csu/../csu/libc-start.c:274

Direct leak of 4 byte(s) in 1 object(s) allocated from:
    #0 0x526b18 in operator new(unsigned long) (/home/aarna/devel/practice/smart_ptrs+0x526b18)
    #1 0x52b47b in MySharedPtr<int>::MySharedPtr(int) /home/aarna/devel/practice/smart_ptrs.cpp:8:26
    #2 0x52b134 in main /home/aarna/devel/practice/smart_ptrs.cpp:64:32
    #3 0x7f6507f483d4 in __libc_start_main /usr/src/debug/glibc-2.17-c758a686/csu/../csu/libc-start.c:274

Direct leak of 4 byte(s) in 1 object(s) allocated from:
    #0 0x526b18 in operator new(unsigned long) (/home/aarna/devel/practice/smart_ptrs+0x526b18)
    #1 0x52b50c in MySharedPtr<int>::MySharedPtr(int) /home/aarna/devel/practice/smart_ptrs.cpp:9:19
    #2 0x52b134 in main /home/aarna/devel/practice/smart_ptrs.cpp:64:32
    #3 0x7f6507f483d4 in __libc_start_main /usr/src/debug/glibc-2.17-c758a686/csu/../csu/libc-start.c:274

Direct leak of 4 byte(s) in 1 object(s) allocated from:
    #0 0x526b18 in operator new(unsigned long) (/home/aarna/devel/practice/smart_ptrs+0x526b18)
    #1 0x52b47b in MySharedPtr<int>::MySharedPtr(int) /home/aarna/devel/practice/smart_ptrs.cpp:8:26
    #2 0x52b112 in main /home/aarna/devel/practice/smart_ptrs.cpp:62:28
    #3 0x7f6507f483d4 in __libc_start_main /usr/src/debug/glibc-2.17-c758a686/csu/../csu/libc-start.c:274

SUMMARY: AddressSanitizer: 16 byte(s) leaked in 4 allocation(s).

The explanation: The address sanitizer has precisely pointed out the line numbers. It is very useful in coming up with a fix.

Valgrind

Command:

valgrind --tool=memcheck  --leak-check=full --show-leak-kinds=all --track-origins=yes  ./smart_ptrs

The output:

[aarna@localhost practice]$ valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all --track-origins=yes ./smart_ptrs
==25564== Memcheck, a memory error detector
==25564== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==25564== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==25564== Command: ./smart_ptrs
==25564==
==25564==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
==25564==ASan shadow was supposed to be located in the [0x00007fff7000-0x10007fff7fff] range.
==25564==Process memory map follows:
0x000000400000-0x00000055d000 /home/aarna/devel/practice/smart_ptrs
0x00000075c000-0x00000075d000 /home/aarna/devel/practice/smart_ptrs
0x00000075d000-0x000000760000 /home/aarna/devel/practice/smart_ptrs
0x000000760000-0x000001447000
0x000004000000-0x000004022000 /usr/lib64/ld-2.17.so
0x000004022000-0x000004036000
0x00000403f000-0x000004055000
0x000004221000-0x000004222000 /usr/lib64/ld-2.17.so
0x000004222000-0x000004223000 /usr/lib64/ld-2.17.so
0x000004223000-0x000004224000
0x000004224000-0x000004225000
0x000004a24000-0x000004a25000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_core-amd64-linux.so
0x000004a25000-0x000004c24000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_core-amd64-linux.so
0x000004c24000-0x000004c25000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_core-amd64-linux.so
0x000004c25000-0x000004c26000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_core-amd64-linux.so
0x000004c26000-0x000004c34000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so
0x000004c34000-0x000004e34000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so
0x000004e34000-0x000004e35000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so
0x000004e35000-0x000004e36000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so
0x000004e36000-0x000004f1f000 /usr/lib64/libstdc++.so.6.0.19
0x000004f1f000-0x00000511e000 /usr/lib64/libstdc++.so.6.0.19
0x00000511e000-0x000005126000 /usr/lib64/libstdc++.so.6.0.19
0x000005126000-0x000005128000 /usr/lib64/libstdc++.so.6.0.19
0x000005128000-0x00000513d000
0x00000513d000-0x00000523e000 /usr/lib64/libm-2.17.so
0x00000523e000-0x00000543d000 /usr/lib64/libm-2.17.so
0x00000543d000-0x00000543e000 /usr/lib64/libm-2.17.so
0x00000543e000-0x00000543f000 /usr/lib64/libm-2.17.so
0x00000543f000-0x000005456000 /usr/lib64/libpthread-2.17.so
0x000005456000-0x000005655000 /usr/lib64/libpthread-2.17.so
0x000005655000-0x000005656000 /usr/lib64/libpthread-2.17.so
0x000005656000-0x000005657000 /usr/lib64/libpthread-2.17.so
0x000005657000-0x00000565b000
0x00000565b000-0x000005662000 /usr/lib64/librt-2.17.so
0x000005662000-0x000005861000 /usr/lib64/librt-2.17.so
0x000005861000-0x000005862000 /usr/lib64/librt-2.17.so
0x000005862000-0x000005863000 /usr/lib64/librt-2.17.so
0x000005863000-0x000005865000 /usr/lib64/libdl-2.17.so
0x000005865000-0x000005a65000 /usr/lib64/libdl-2.17.so
0x000005a65000-0x000005a66000 /usr/lib64/libdl-2.17.so
0x000005a66000-0x000005a67000 /usr/lib64/libdl-2.17.so
0x000005a67000-0x000005a7c000 /usr/lib64/libgcc_s-4.8.5-20150702.so.1
0x000005a7c000-0x000005c7b000 /usr/lib64/libgcc_s-4.8.5-20150702.so.1
0x000005c7b000-0x000005c7c000 /usr/lib64/libgcc_s-4.8.5-20150702.so.1
0x000005c7c000-0x000005c7d000 /usr/lib64/libgcc_s-4.8.5-20150702.so.1
0x000005c7d000-0x000005e40000 /usr/lib64/libc-2.17.so
0x000005e40000-0x00000603f000 /usr/lib64/libc-2.17.so
0x00000603f000-0x000006043000 /usr/lib64/libc-2.17.so
0x000006043000-0x000006045000 /usr/lib64/libc-2.17.so
0x000006045000-0x00000639c000
0x00000639c000-0x00000679c000
0x000058000000-0x00005823b000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/memcheck-amd64-linux
0x00005843b000-0x00005843e000 /opt/rh/devtoolset-7/root/usr/lib64/valgrind/memcheck-amd64-linux
0x00005843e000-0x000059e40000
0x001002001000-0x001008b0a000
0x001008b8c000-0x001008bac000
0x001008bac000-0x001008bae000
0x001008bae000-0x001008cae000
0x001008cae000-0x001008cb0000
0x001008cb0000-0x001008cb1000 /tmp/vgdb-pipe-shared-mem-vgdb-25564-by-aarna-on-localhost.localdomain
0x001008cb1000-0x00100b0fd000
0x00100b1fd000-0x00100b3fd000
0x00100b4fd000-0x00100b5fd000
0x00100b7f2000-0x00100ba1b000
0x00100bbdb000-0x00100bcdb000
0x001ffeffd000-0x001fff001000
0x7ffd1941d000-0x7ffd1943e000 [stack]
0xffffffffff600000-0xffffffffff601000 [vsyscall]
==25564==End of process memory map.
==25564==
==25564== HEAP SUMMARY:
==25564== in use at exit: 32 bytes in 1 blocks
==25564== total heap usage: 1 allocs, 0 frees, 32 bytes allocated
==25564==
==25564== 32 bytes in 1 blocks are still reachable in loss record 1 of 1
==25564== at 0x4C2B955: calloc (vg_replace_malloc.c:711)
==25564== by 0x586454F: _dlerror_run (dlerror.c:141)
==25564== by 0x5864057: dlsym (dlsym.c:70)
==25564== by 0x50233B: __interception::GetRealFunctionAddress(char const*, unsigned long*, unsigned long, unsigned long) (in /home/aarna/devel/practice/smart_ptrs)
==25564== by 0x4DD462: __asan::InitializeAsanInterceptors() (in /home/aarna/devel/practice/smart_ptrs)
==25564== by 0x41A49F: __asan::AsanInitInternal() [clone .part.1] (in /home/aarna/devel/practice/smart_ptrs)
==25564== by 0x400FCA2: _dl_init (dl-init.c:116)
==25564== by 0x4001029: ??? (in /usr/lib64/ld-2.17.so)
==25564==
==25564== LEAK SUMMARY:
==25564== definitely lost: 0 bytes in 0 blocks
==25564== indirectly lost: 0 bytes in 0 blocks
==25564== possibly lost: 0 bytes in 0 blocks
==25564== still reachable: 32 bytes in 1 blocks
==25564== suppressed: 0 bytes in 0 blocks
==25564==
==25564== For counts of detected and suppressed errors, rerun with: -v
==25564== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

The explanation:

The output by valgrind shows entire picture of virtual memory and how source files and code is laid out in the memory. It detects the loss of 32 bytes of heap memory still in use at the exit.

The fix:

The output shown by the google address sanitizer was useful at micro-level. It precisely pointed the line number and I was able to apply my first fix to the memory leak in virtual destructor as follows:

virtual ~MySharedPtr(){
  if(*c_ptr > 0){
    (*c_ptr)--;
  }
  
  if(*c_ptr == 0){
    if(resource_ptr != nullptr){
      delete resource_ptr;
      resource_ptr = nullptr;
    }
    delete c_ptr;
    c_ptr = nullptr;
  }
}

I still have following leaks:

=================================================================
==9130==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4 byte(s) in 1 object(s) allocated from:
#0 0x1026a53d0 in wrap__Znwm (libasan.5.dylib:x86_64+0x6e3d0)
#1 0x102324526 in MySharedPtr<int>::MySharedPtr(int) shared_ptr.cpp:9
#2 0x10232428a in main shared_ptr.cpp:68
#3 0x7fff7661b014 in start (libdyld.dylib:x86_64+0x1014)

Direct leak of 4 byte(s) in 1 object(s) allocated from:
#0 0x1026a53d0 in wrap__Znwm (libasan.5.dylib:x86_64+0x6e3d0)
#1 0x1023244a4 in MySharedPtr<int>::MySharedPtr(int) shared_ptr.cpp:8
#2 0x10232428a in main shared_ptr.cpp:68
#3 0x7fff7661b014 in start (libdyld.dylib:x86_64+0x1014)

SUMMARY: AddressSanitizer: 8 byte(s) leaked in 2 allocation(s).

Going through the code I fixed my copy assignment operator by deleting the resource pointer and the counter pointer as follows:

MySharedPtr<T>& operator=(const MySharedPtr<T> & ptr){
  std::cout << "copy assignment operator\n";
  if(this != &ptr){
    delete resource_ptr;  // deleting fix 1
    delete c_ptr;         // deleting fix 2
    (*ptr.c_ptr)++;
    this->c_ptr = ptr.c_ptr;
    this->resource_ptr = ptr.resource_ptr;
   }
   return *this;
}

And my code generated error free output:

copy assignment operator
use count is 2
copy assignment operator
use count is 1

Template Metaprogramming Patterns

martin-adams-522797-unsplash

SFINAE – Substitution Failure is Not An Error

Template meta programming is fun. Lot of simple algorithms can be done at compile time and in very powerful way. The code uses a template meta-programming pattern called as SFINAE – Substitution failure is not an error.

Following are three examples for common problems like Fibonacci, Factorial and GCD calculations using SFINAE.

#include <iostream>

// find fibonacci using template meta-programming
template <long long N>
struct fibonacci{
  static constexpr long long value = fibonacci<N-1>::value + fibonacci<N-2>::value;
};

template<>
struct fibonacci<1>{
  static constexpr long long value = 1;
};

template<>
struct fibonacci<0>{
  static constexpr long long value = 0;
};

// end code for fibonacci
// -------------------------------------------------------------------------------------------
// find gcd using template meta programming
template<unsigned M, unsigned N>
struct gcd{
  static constexpr auto value = gcd<N,M%N>::value;
};

template<unsigned M>
struct gcd<M,0>{
  static_assert(M!=0);
  static constexpr auto value = M;
};
// -------------------------------------------------------------------------------------------
// factorial using template meta-programming
template<long long N>
struct factorial{
  static constexpr long long value = N*(factorial<N-1>::value);
};

template<>
struct factorial<0>{
  static constexpr long long value = 1;
};

int main(){
  std::cout << gcd<15,5>::value << '\n'; // 5
  std::cout << fibonacci<80>::value << '\n'; // 23416728348467685
  std::cout << factorial<20>::value << '\n'; // 2432902008176640000
}

More information about SINAF

 

CRTP – Curiously Recurring Template Pattern

The CRTP pattern was discovered  by an accident. There is another name for this pattern,  “Static Polymorphism”.The base class is generally templatized class and derived class is subclassed from base class with type as templatized class. The reason this is called as CRTP is because a method from derived class is called from base class statically.  The definition is rather complicated but the code should clear it.

#include <iostream>

struct Exchange{
  std::string name;
};

template <typename T>
struct Order{
  int transact(Exchange * exchange, size_t quantity){
    return static_cast<T *>(this)->transact(quantity);
  }
};

struct BuyOrder: Order<BuyOrder>{
  int transact(Exchange * exchange, size_t quantity){
    buy();
    return quantity;
  }

  void buy(){
    // code for buy goes here..
  }
};

struct SellOrder: Order<SellOrder>{
  int transact(Exchange * exchange, size_t quantity){
    sell();
    return quantity;
  }

  void sell(){
    // code for sell goes here..
  }
};

int main(){
  Exchange e;
  BuyOrder bo;
  bo.transact(e, 100);
  SellOrder so;
  so.transact(e, 100);
}

 

Modern c++ coding Thread pool

First attempt is to code the thread pool is using simple conditional variables and mutex. Once the multi-producer, multi-consumer thread pool is working as expected, its easy to convert into lock free buffer.

The ​​Threadpool class is a struct with following member variables.

  1. struct Work with following member variables
    1. _work to store the lambda/function pointer
    2. _args_ary to store the array
    3. is_processed boolean flag.
  2. std::array of a fixed size to hold the work.
  3. static const variable to indicate the max_size of work array.
  4. head_idx and tail_idx to guide the next index.
  5. a condition variable cv
  6. mutex m

There are 2 main methods for enqueuing and popping the items out and working on it. Since its a circular queue, next index calculated by adding one to current index and taking remainder from max_size. i.e.

next_idx = (current_index + 1 ) % max_size

Here is an example of SPSC – single producer single consumer locked queue. The queue is bounded and blocking. Means, if either the producer or the consumer is slow, it is going to block using the condition variable and mutex.

#include <iostream>
#include <thread>
#include <array>
#include <functional>
#include <vector>
#include <random>
#include <chrono>

using func_type = std::function<int(int,int)>;
using args_ary = std::array<int,2>;

struct ThreadPool{
  // Enqueue function to get the function and arguments to the function.
  // consumer can invoke function with args passed on to it. 
  bool enqueue(func_type fn, args_ary args){
    size_t next_idx = (head_idx + 1) % max_size;

    std::unique_lock<std::mutex> lk(mtx);
    cv.wait(lk,[&](){
      return work[next_idx].processed();
    });

    Work w{fn, args};
    work[next_idx] = w;
    head_idx = next_idx;
    cv.notify_all();
    return true;
  }

  // process or pop function to pop the latest record and process it.
  bool process(){
    size_t next_idx = (tail_idx + 1) % max_size;
    std::unique_lock<std::mutex> lk(mtx);
    cv.wait(lk,[&](){
      return !work[next_idx].processed();
    });

    Work w = work[next_idx];
    work[next_idx].set_processed();

    if(!w.invalid()){
      int output = w.process();
      std::cout << output << '\n';
    }
    tail_idx = next_idx;
    cv.notify_all();
    return true;
}

private:
  struct Work{
    private:
      func_type _work;
      args_ary _args;
      bool is_processed = false;

    public:
      Work():is_processed(true){};
      Work(func_type work, args_ary args):_work{work},_args{args},is_processed(false){};
      bool processed(){
        return is_processed;
      }
      int process(){
        return _work(_args[0], _args[1]);
      }
      bool invalid(){
        return _work == nullptr;
      }
      void set_processed(){
        is_processed = true;
      }
   };

   static const size_t max_size = 1000;
   std::array<Work,max_size> work;
   size_t head_idx = 0, tail_idx = 0;
   std::condition_variable cv;
  std::mutex mtx;
};

int random_num(){
  std::mt19937 rng;
  rng.seed(std::random_device()());
  std::uniform_int_distribution<std::mt19937::result_type> dist(1,10000); // distribution in range [1,10000]
  return dist(rng);
}

int main(){
  ThreadPool tp;

  std::thread producer1([&](){
    int count = 0;
    while(true){
      std::function<int(int,int)> fn;
      if (count % 3 == 0){
        fn = [](int a, int b){ std::cout << "Adding \t\t" << a << '\t'<< b << '\t'; return a+b;};
      }else if(count % 3 == 1){
        fn = [](int a, int b){ std::cout << "Subtracting \t" << a << '\t'<< b << '\t'; return a-b;};
      }else{
        fn = [](int a, int b){std::cout << "Multiplying \t" << a << '\t'<< b << '\t'; return a*b;};
      }
      tp.enqueue(fn, args_ary{random_num(),random_num()});
      std::this_thread::sleep_for(std::chrono::milliseconds(10));
      ++count;
     }
  });

  std::thread consumer1([&](){
    while(true){
      tp.process();
    }
  });

  if(producer1.joinable()){
    producer1.join();
  }

  if(consumer1.joinable()){
    consumer1.join();
  }
  return 0;
}

Here is a complete working example for MPMC queue.

#include <iostream>
#include <thread>
#include <array>
#include <functional>
#include <vector>
#include <random>
#include <chrono>

using func_type = std::function<int(int,int)>;
using args_ary = std::array<int,2>;

struct ThreadPool{
  ThreadPool(){
    for(int i = 0; i < max_size; ++i){
      work[i] = Work();
    }
  }

  bool enqueue(func_type fn, args_ary args){
    size_t next_idx = (head_idx + 1) % max_size;
    while(work[next_idx].processed()){
      std::unique_lock<std::mutex> lk(mtx);
      cv.wait(lk,[&](){
        return work[next_idx].processed();
      });

      Work w = work[next_idx];
      if(w.invalid() || w.processed()){
        work[next_idx].set_work(fn);
        work[next_idx].set_args(args);
        work[next_idx].reset();
        cv.notify_all();
        break;
      }
   }
   head_idx = next_idx;
   return true;
}

  bool process(int remainder, int total_threads){
    size_t next_idx = (tail_idx + 1) % max_size;

    std::unique_lock<std::mutex> lk(mtx);
    cv.wait(lk,[&](){
      return work[next_idx].unprocessed();
    });

    Work w = work[next_idx];
    work[next_idx].set_processed();

    if(!w.invalid()){
      int output = w.process();
      std::cout << output << '\n';
    }
    tail_idx = next_idx;
    cv.notify_all();
    return true;
  }

  private:
    struct Work{
    private:
      func_type _work;
      args_ary _args;
      bool is_processed = false;

    public:
      Work():is_processed(true){}; 
      int process(){
        return _work(_args[0], _args[1]);
      }
    
    bool invalid(){
      return _work == nullptr;
    }
    
    void set_processed(){
      is_processed = true;
    }

    void set_work(func_type work){
      _work = work;
    }

    void set_args(args_ary args){
      _args = args;
    }
    void reset(){
      is_processed = false;
    }

    bool unprocessed(){
      return is_processed == false;
    }

    bool processed(){
      return is_processed == true;
    }
};

  static const size_t max_size = 1000;
  std::array<Work,max_size> work;
  size_t head_idx = 0, tail_idx = 0;
  std::condition_variable cv;
  std::mutex mtx;
};

int random_num(){
  std::mt19937 rng;
  rng.seed(std::random_device()());
  std::uniform_int_distribution<std::mt19937::result_type> dist(1,10000); // distribution in range [1,10000]
  return dist(rng);
}

int main(){
  ThreadPool tp;
  auto adder = [](int a, int b){ std::cout << "Adding \t\t" << a << '\t'<< b << '\t'; return a+b;};
  auto subtractor = [](int a, int b){ std::cout << "Subtracting \t" << a << '\t'<< b << '\t'; return a-b;};
  auto multiplicator = [](int a, int b){std::cout << "Multiplying \t" << a << '\t'<< b << '\t'; return a*b;};

  std::thread producer1([&](){
    while(true){
      tp.enqueue(adder, args_ary{random_num(),random_num()});
      //std::this_thread::sleep_for(std::chrono::milliseconds(10));
    } 
  });

  std::thread producer2([&](){
    while(true){
      tp.enqueue(subtractor, args_ary{random_num(),random_num()}); 
      //std::this_thread::sleep_for(std::chrono::milliseconds(10));
  } 
});

std::thread producer3([&](){
  while(true){
    tp.enqueue(multiplicator, args_ary{random_num(),random_num()}); 
      //std::this_thread::sleep_for(std::chrono::milliseconds(10));
  } 
});

std::thread consumer1([&](){
  while(true){
    tp.process(0, 3);
  }
});

std::thread consumer2([&](){
  while(true){
    tp.process(1, 3);
  }
});

std::thread consumer3([&](){
  while(true){
    tp.process(2, 3);
  }
});


  if(producer1.joinable()){
    producer1.join();
  }

  if(producer2.joinable()){
    producer2.join();
  }

  if(producer3.joinable()){
    producer3.join();
  }

  if(consumer1.joinable()){
    consumer1.join();
  }

  if(consumer2.joinable()){
    consumer2.join();
  }
  if(consumer3.joinable()){
    consumer3.join();
  }

  return 0;
}

Output

Adding      9316 8539 17855
Subtracting 3529 1274 2255
Multiplying 7821 2404 18801684
Adding      9455 8300 17755
Subtracting 9882 5696 4186
Multiplying 202 9795 1978590
Adding      5794 7979 13773
Subtracting 4331 576 3755
Multiplying 963 4548 4379724
Adding      7238 9714 16952
Subtracting 9868 5185 4683
Multiplying 9139 4941 45155799
Adding      6842 7031 13873
Subtracting 3043 2312 731

Lockfree code

A Single producer, single consumer Lockfree queue can be written as follows

#include <iostream>
#include <thread>
#include <array>
#include <functional>
#include <vector>
#include <random>
#include <chrono>

using func_type = std::function<int(int,int)>;
using args_ary = std::array<int,2>;

struct ThreadPool{
  // Constructor
  ThreadPool(){
    for(int i = 0; i < max_size; ++i){
      work[i] = Work();
    }
  }

  bool enqueue(func_type fn, args_ary args){
    size_t current_head_idx = head_idx.load(std::memory_order_acquire);
    std::atomic<size_t> next_idx = (current_head_idx + 1) % max_size;

    Work w = work[next_idx];
    if(w.invalid() || w.processed()){
      work[next_idx].set_work(fn);
      work[next_idx].set_args(args);
      work[next_idx].reset();
      head_idx.store(next_idx, std::memory_order_release);
    }
    return true;
  }

  bool process(){
    size_t current_tail_idx = tail_idx.load(std::memory_order_acquire);
    std::atomic<size_t> next_idx = (current_tail_idx + 1) % max_size;

    if(!work[next_idx].processed()){
      int output = work[next_idx].process();
      std::cout << output << '\n';
      work[next_idx].set_processed();
      tail_idx.store(next_idx);
     }
    return true;
}

  private:
    struct Work{
      private:
        func_type _work;
        args_ary _args;
        bool is_processed = false;

      public:
        Work():is_processed(true){};
        Work(func_type work, args_ary args):_work{work},_args{args},is_processed(false){};
        bool processed(){
          return is_processed == true;
        }

       int process(){
         return _work(_args[0], _args[1]);
       }

       bool invalid(){
         return _work == nullptr;
       }

       void set_processed(){ 
         is_processed = true;
       }

       void set_work(func_type work){
         _work = work;
       }

       void set_args(args_ary args){
         _args = args;
       }
       
       void reset(){
         is_processed = false;
       }
    };

    static const size_t max_size = 1000;
    std::array<Work,max_size> work;
    std::atomic<size_t> head_idx{0}, tail_idx{0};
};

int random_num(){
  std::mt19937 rng;
  rng.seed(std::random_device()());
  std::uniform_int_distribution<std::mt19937::result_type> dist(1,10000); // distribution in range [1,10000]
  return dist(rng);
}

int main(){
  ThreadPool tp;

  std::thread producer1([&](){
    int count = 0;
    while(true){
      std::function<int(int,int)> fn;
      if (count % 3 == 0){
        fn = [](int a, int b){ 
               std::cout << "Adding \t\t" << a << '\t'<< b << '\t'; 
               return a+b;
              };
      }else if(count % 3 == 1){
        fn = [](int a, int b){ 
               std::cout << "Subtracting \t" << a << '\t'<< b << '\t'; 
               return a-b;};
      }else{
        fn = [](int a, int b){
              std::cout << "Multiplying \t" << a << '\t'<< b << '\t'; 
              return a*b;};
        }
       tp.enqueue(fn, args_ary{random_num(),random_num()});
       ++count;
    }
});

  std::thread consumer1([&](){
    while(true){
     tp.process();
    }
  });

  if(producer1.joinable()){
    producer1.join();
  }

  if(consumer1.joinable()){
    consumer1.join();
  }
  return 0;
}

Sizes of data types in c++

centimeters-close-up-depth-of-field-162500

This post explains sizes of different data types, structures and classes under various circumstances.

#include <iostream>
#include <atomic>
#include <string>
#include <vector>
#include <set>
#include <queue>
#include <forward_list>
#include <memory>
// An empty class without any data members
class EmptyClass{};

// class with member variable int
class X{
  int a;
};

// class with member variable int and double
class Y{
  int a;
  double b;
};

// class with static variable int
class StaticMember{
  static int a;
};

// packed structure
class __attribute__((packed)) YPacked{
int a;
double b;
};

// packed large structure
class __attribute__((packed)) LargeStruct {
int a;
double b;
float c;
char d;
double l;
double m;
int n;
int o;
double p;
double q;
double t;
double u;
double v;
double w;
char s;
double x;
float y;
int z;
};

// class with a single virtual function
class VFunClass{
  public:
    virtual void fun(){std::cout << "class with virtual function\n";};
};

// class with a virtual destructor.
class VDesttructorClass{
  public:
    virtual ~VDesttructorClass(){
      std::cout << "in the virtual destructor\n";
    }
};


// Multi level class hierarchy
class Base{};

class Derived: public Base{

};

class Derived1: public Base{
};

class Derived2: public Base{
};

class MostDerived: public Derived, public Derived1, public Derived2{};
class VBase{};

class VDerieved: public virtual VBase{

};

class VDerieved1: public virtual VBase{
};


class VMostDerieved: public VDerieved, public VDerieved1{

};
int main(){
  std::string s = "test string";
  std::cout << "sizeof(s) "<< sizeof(s) << '\n'; // 24
  std::cout << "s.size() " << s.size() << '\n'; // 11


  std::vector<int> v{1,2,3,4,5};
  std::cout << "sizeof(v) "<< sizeof(v) << '\n'; // 24
  std::cout << "v.size() " << v.size() << '\n'; // 5

  std::set<int> set{1,2,3,4,5};
  std::cout << "sizeof(set) "<< sizeof(set) << '\n'; // 24
  std::cout << "set.size() " << set.size() << '\n'; // 5

  std::queue<int> queue;
  for(int i = 0; i < 5; ++i) queue.push(i);
  std::cout << "sizeof(queue) "<< sizeof(queue) << '\n'; // 48
  std::cout << "queue.size() " << queue.size() << '\n'; // 5

  std::forward_list<int> fl;
  auto it = fl.before_begin();
  it = fl.emplace_after ( it, 100 );
  it = fl.emplace_after ( it, 200 );
  it = fl.emplace_after ( it, 300 );
  std::cout << "sizeof(fl) "<< sizeof(fl) << '\n'; // 8

// Size of X is 4 because there is a single data member in the class/struct
  std::cout << "sizeof(X) "<< sizeof(X) << '\n'; // 4

// Size of Y should be 12 i.e. addition of int (4) + double (8)= 12
// but its size if 16 because the compiler aligns the class/struct
// by multiples of its max member.
  std::cout << "sizeof(Y) "<< sizeof(Y) << '\n'; // 16

// Size of a packed structure = sum of sizes of member variables
  std::cout << "sizeof(YPacked) "<< sizeof(YPacked) << '\n'; // 12

  std::unique_ptr<int> uniq_ptr = std::make_unique<int>(10);
  std::cout << "sizeof(uniq_ptr) " << sizeof(uniq_ptr) << '\n'; // 8

  std::shared_ptr<int> sh_ptr = std::make_shared<int>(10);
  std::cout << "sizeof(sh_ptr) " << sizeof(sh_ptr) << '\n'; // 16

  std::weak_ptr<int> w_ptr = std::make_shared<int>(10);
  std::cout << "sizeof(w_ptr) " << sizeof(w_ptr) << '\n'; // 16

  std::cout << "sizeof(EmptyClass) "<< sizeof(EmptyClass) << '\n'; // 1 
  std::cout << "sizeof(VFunClass) " << sizeof(VFunClass) << '\n'; // 8
  std::cout << "sizeof(VDesttructorClass) " << sizeof(VDesttructorClass) << '\n'; // 8

// Size of most Derived class in the hierarchy with 3 NON virtual 
// Base classes has 2 * 1 bype per class Hence the size if 3
  std::cout << "sizeof(MostDerived) " << sizeof(MostDerived) << '\n'; // 3

// Size of most Derived class in the hierarchy with 2 virtual Base classes
// has 2 * 8 bype (v_ptr). Hence the size if 16.
std::cout << "sizeof(VMostDerived) " << sizeof(VMostDerived) << '\n'; // 16

// Size of class with only static variable is 1 i.e.
// it does not include size of static in the size of class.
  std::cout << "sizeof(StaticMember) " << sizeof(StaticMember) << '\n'; // 1

  std::atomic<int> i = 10;
  std::cout << "sizeof(std::atomic<int>) " << sizeof(i) << '\n'; // 4

//size of packed atomic structure is 16
  std::atomic<double> d;
  std::cout << "sizeof(std::atomic<double>) " << sizeof(std::atomic<double>) << '\n'; // 16

//size of packed atomic structure is 16
  std::atomic<YPacked> yp;
  std::cout << "sizeof(std::atomic<YPacked>) " << sizeof(std::atomic<YPacked>) << '\n'; // 16
}

CRTP a.k.a. Static Polymorphism

The size of CRTP class is 1 compared to 8 in virtual function/class.

template <typename T>
class Logger{
  public:
    void log(){
      static_cast<T*>(this)->log();
    }
};

class FileLogger: public Logger<FileLogger>{
  public:
    void log(){
      std::cout << "In the class FileLogger\n";
    }
};

int main(){
  std::cout << sizeof(FileLogger) << '\n'; // Size is 1
  FileLogger fl;
  r43fl.log();
}

Sublime Build System

{
"cmd": "g++ -std=c++17 \"${file}\" -o \"${file_path}/${file_base_name}\" && \"${file_path}/${file_base_name}\"",
   "file_regex": "^(..[^:]*):([0-9]+):?([0-9]+)?:? (.*)$",
   "working_dir": "${file_path}",
   "selector": "source.c, source.c++",
   "shell": true,
   "variants":
   [
    
     {
       "name": "Run",
      "cmd": "g++ -std=c++17 \"${file}\" -o \"${file_path}/${file_base_name}\" && \"${file_path}/${file_base_name}\""
     }
   ]
  }

Vector back inserter example + algorithm

The problem is pretty simple: there is a list of time ranges for meetings and need to show the meetings as continuous block in calendar.

70940aec3ea953621db1b8faf619a5ca_2

As per modern c++ I have used std::back_inserter_iterator for vector instead of passing vector by value or by reference. This approach is much cleaner, modern and saves a lot of copying of data compared to old style.

// i/p: [(0, 1), (3, 5), (4, 8), (10, 12), (9, 10)]
// o/p: [(0, 1), (3, 8), (9, 12)]

#include <iostream>
#include <vector>
#include <algorithm>

struct Meeting; // define meeting

using it = std::vector<Meeting>::iterator;
// use an back inserter iterator to pass as function argument instead
// of passing vector by reference or creating vector on heap
using bi = std::back_insert_iterator<std::vector<Meeting>>;

struct Meeting{
  int start_time, end_time;
  // default constructor
  Meeting() = default;
 // parameterized constructor
  Meeting(int start_time, int end_time):start_time{start_time},end_time{end_date}{}
  
  // override < operator used for sorting.
  bool operator <(const Meeting &other) const{
    return start_date < other.start_date;
  }

static void merge(it start, it finish, bi merged_meeting_bi){
  int start_time = start->start_time;
  int end_time = start->end_time;
  while(start < finish){
    start++;
    if(start->start_time > end_time){
      merged_meeting_bi = {start_time, end_time}; // create meeting using parameterized constructor
      start_time = start->start_time; 
      end_time = start->end_time;
    }else{
      end_time = std::max(start->end_time, end_time);
     }
   }
  }
};

int main(){
  std::vector<Meeting> meetings{{0, 1}, {3, 5}, {4, 8}, {10, 12}, {9, 10}};
  std::vector<Meeting> merged_meetings;
  auto bi = std::back_insert_iterator<std::vector<Meeting>>(merged_meetings);
  auto start = meetings.begin();
  auto finish = meetings.end();
  // sort by first time
  std::sort(start, finish);
  
for(const auto &meeting : meetings){
std::cout << meeting.start_time << ' ' << meeting.end_time << '\n';
}
Meeting::merge(start, finish, bi);

for(const auto & meeting: merged_meetings){
std::cout << meeting.start_time << ' ' << meeting.end_time << '\n';
}
return 0;
}

Dutch National Flag – Algorithm + TDD

640px-Flag_of_the_Netherlands.svg

The algorithm is pretty simple and achievable in O(n) time where n is the number of elements. Instead of using indexes, I am using 3 set of iterators and added bunch of test cases.

#include <gtest/gtest.h>
#include <algorithm> // random_shuffle
#include <vector>
#include <numeric> // accumulate

using it = std::vector<int>::iterator; // using typedef

void dnf(it less, it equal, it greater, int pivot){
  while(equal <= greater){
    if(*equal < pivot){
      std::iter_swap(less, equal);
      less++;
      equal++;
    }else if(*equal == pivot){
      equal++;
    }else{
      std::iter_swap(equal, greater);
      greater--;
    }
  }
}

TEST(RandomTest, dnfTest){
  std::vector<int> v{2, 2, 0, 1, 1, 0, 2, 1, 0, 1, 0, 1, 0, 2, 2};
  int pivot = 1;
  for(int i = 0; i < 1000; i++){
    dnf(v.begin(), v.begin(), v.end()-1, 1);
    std::string output = std::accumulate(std::next(v.begin()), v.end(), 
                               std::to_string(*v.begin()),
                               [](std::string s, int i){return s+= std::to_string(i);});

    EXPECT_EQ(output, "000001111122222");
  }
}

TEST(RandomTestTwoNumbers, dnfTest){
  std::vector<int> v{0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1};
  int pivot = 1;
  for(int i = 0; i < 1000; i++){
    dnf(v.begin(), v.begin(), v.end()-1, 1);
    std::string output = std::accumulate(std::next(v.begin()), v.end(), 
                               std::to_string(*v.begin()),
                               [](std::string s, int i){return s+= std::to_string(i);});

    EXPECT_EQ(output, "000000001111111");
  }
}

TEST(RandomTestTwoNumbersMidPivot, dnfTest){
  std::vector<int> v{0, 0, 0, 0,0,9,9,9,9,9,9,9,9};
  int pivot = 1;
  for(int i = 0; i < 1000; i++){
    dnf(v.begin(), v.begin(), v.end()-1, 1);
    std::string output = std::accumulate(std::next(v.begin()), v.end(), 
                               std::to_string(*v.begin()),
                               [](std::string s, int i){return s+= std::to_string(i);});

    EXPECT_EQ(output, "0000099999999");
  }
}

TEST(RandomTestTwoNumbersHighPivot, dnfTest){
  std::vector<int> v{0, 0, 0, 0,0,9,9,9,9,9,9,9,9};
  int pivot = 10;
  for(int i = 0; i < 1000; i++){
    dnf(v.begin(), v.begin(), v.end()-1, 1);
    std::string output = std::accumulate(std::next(v.begin()), v.end(), 
                               std::to_string(*v.begin()),
                               [](std::string s, int i){return s+= std::to_string(i);});

    EXPECT_EQ(output, "0000099999999");
  }
}

int main(int argc, char**argv, char**envArg)
{
    testing::InitGoogleTest(&argc, argv);
    return(RUN_ALL_TESTS());
}