libbpg-0.9.6

2015-10-27 11:46:00 +01:00 · 2015-10-27 11:46:00 +01:00 · 35a8402710
commit 35a8402710
parent 3035b41edf
248 changed files with 232891 additions and 100 deletions
--- a/8
+++ b/8
@ -1,5 +1,11 @@
+version 0.9.6:
+
+- Faster encoding (x265 is the default encoder and is built in bpgenc).
+- Added monochrome support to x265.
+- Fixed metadata handling.
+
 version 0.9.5:
-	
+
 - Added animation support.
 - added bpgview utility.
 - bpgenc: fixed support of some JPEG parameter combinations
--- a/59
+++ b/59
@ -4,10 +4,10 @@
 #
 # Enable compilation of Javascript decoder with Emscripten
 #USE_EMCC=y
-# Enable x265 for the encoder (you must install it before)
-#USE_X265=y
+# Enable x265 for the encoder
+USE_X265=y
 # Enable the JCTVC code (best quality but slow) for the encoder
-USE_JCTVC=y
+#USE_JCTVC=y
 # Compile bpgview (SDL and SDL_image libraries needed)
 USE_BPGVIEW=y
 # Enable it to use bit depths > 12 (need more tests to validate encoder)
@ -23,8 +23,8 @@ prefix=/usr/local
 #################################

 ifdef CONFIG_WIN32
-#CROSS_PREFIX:=x86_64-w64-mingw32-
-CROSS_PREFIX=i686-w64-mingw32-
+CROSS_PREFIX:=x86_64-w64-mingw32-
+#CROSS_PREFIX=i686-w64-mingw32-
 EXE:=.exe
 else
 CROSS_PREFIX:=
@ -97,9 +97,41 @@ BPGENC_OBJS:=bpgenc.o
 BPGENC_LIBS:=

 ifdef USE_X265
-BPGENC_OBJS+=x265_glue.o
-BPGENC_LIBS+= -lx265
+
+X265_LIBS:=./x265.out/8bit/libx265.a ./x265.out/10bit/libx265.a ./x265.out/12bit/libx265.a
+BPGENC_OBJS+=x265_glue.o $(X265_LIBS)
+
 bpgenc.o: CFLAGS+=-DUSE_X265
+x265_glue.o: CFLAGS+=-I./x265/source -I./x265.out/8bit
+x265_glue.o: $(X265_LIBS)
+
+ifdef CONFIG_WIN32
+CMAKE_OPTS:=-DCMAKE_TOOLCHAIN_FILE=../../x265/build/msys/toolchain-x86_64-w64-mingw32.cmake
+else
+CMAKE_OPTS:=
+endif
+
+x265.out:
+	mkdir -p x265.out/8bit x265.out/10bit x265.out/12bit
+	cd x265.out/12bit && cmake ../../x265/source $(CMAKE_OPTS) -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+	cd x265.out/10bit && cmake ../../x265/source $(CMAKE_OPTS) -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN10=ON
+	cd x265.out/8bit && cmake ../../x265/source $(CMAKE_OPTS) -DLINKED_10BIT=ON -DLINKED_12BIT=ON -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+
+# use this target to manually rebuild x265
+x265_make: | x265.out
+	$(MAKE) -C x265.out/12bit
+	$(MAKE) -C x265.out/10bit
+	$(MAKE) -C x265.out/8bit
+
+x265_clean:
+	rm -rf x265.out
+
+$(X265_LIBS): x265_make
+
+else
+
+x265_clean:
+
 endif # USE_X265

 ifdef USE_JCTVC
@ -133,10 +165,9 @@ endif # USE_JCTVC

 ifdef CONFIG_WIN32

-LDFLAGS+=-static
-BPGDEC_LIBS:=-Wl,-dy -lpng -lz -Wl,-dn
-BPGENC_LIBS+=-Wl,-dy -lpng -ljpeg -lz -Wl,-dn
-BPGVIEW_LIBS:=-lmingw32 -lSDLmain -Wl,-dy -lSDL_image -lSDL -Wl,-dn -mwindows
+BPGDEC_LIBS:=-lpng -lz
+BPGENC_LIBS+=-lpng -ljpeg -lz
+BPGVIEW_LIBS:=-lmingw32 -lSDLmain -lSDL_image -lSDL -mwindows

 else

@ -171,10 +202,10 @@ bpgdec.js: $(LIBBPG_JS_OBJS) post.js
 	$(EMCC) $(EMLDFLAGS) -s TOTAL_MEMORY=33554432 -o $@ $(LIBBPG_JS_OBJS)

 bpgdec8.js: $(LIBBPG_JS8_OBJS) post.js
-	$(EMCC) $(EMLDFLAGS) -s TOTAL_MEMORY=16777216 -o $@ $(LIBBPG_JS8_OBJS)
+	$(EMCC) $(EMLDFLAGS) -s TOTAL_MEMORY=33554432 -o $@ $(LIBBPG_JS8_OBJS)

 bpgdec8a.js: $(LIBBPG_JS8A_OBJS) post.js
-	$(EMCC) $(EMLDFLAGS) -s TOTAL_MEMORY=16777216 -o $@ $(LIBBPG_JS8A_OBJS)
+	$(EMCC) $(EMLDFLAGS) -s TOTAL_MEMORY=33554432 -o $@ $(LIBBPG_JS8A_OBJS)

 size:
 	strip bpgdec
@ -187,7 +218,7 @@ install: bpgenc bpgdec
 CLEAN_DIRS=doc html libavcodec libavutil \
     jctvc jctvc/TLibEncoder jctvc/TLibVideoIO jctvc/TLibCommon jctvc/libmd5

-clean:
+clean: x265_clean
 	rm -f $(PROGS) *.o *.a *.d *~ $(addsuffix /*.o, $(CLEAN_DIRS)) \
          $(addsuffix /*.d, $(CLEAN_DIRS)) $(addsuffix /*~, $(CLEAN_DIRS)) \
          $(addsuffix /*.a, $(CLEAN_DIRS))
--- a/103
+++ b/103
@ -8,12 +8,6 @@ BPG Image library and utilities
  options should be OK for Linux). Type 'make' to compile and 'make
  install' to install the compiled binaries.

- x265 usage: for much increased compression speed (but lower
-  quality), you can compile and install x265 and then enable its use
-  in the Makefile. x265 supports by default only 8 bits per component
-  and does not support monochrome encoding yet (hence no alpha nor
-  grayscale images can be encoded with it).
-
 - bpgview: in order to compile it you need to install the SDL and
  SDL_image libraries.

@ -27,24 +21,57 @@ BPG Image library and utilities

 - The BPG file format is specified in doc/bpg_spec.txt.

-2) BPG encoder
+2) Compilation and Installation Notes
+-------------------------------------
+
+2.1) Linux
+----------
+
+  - Edit the Makefile to change the compile options (the default
+  compile options should be OK). Type 'make' to compile and 'make
+  install' to install the compiled binaries.
+ 
+  - Use 'make -j N' where N is the number of CPU cores to compile faster.
+
+  - The following packages must be installed: SDL-devel
+  SDL_image-devel yasm. It is recommended to use yasm version >= 1.3.0
+  to have a faster compilation.
+ 
+  - Only a 64 bit target is supported because x265 needs it for bit
+    depths > 8.
+
+2.2) Windows
+------------
+
+  - Only cross-compilation from Linux is supported.
+
+  - The following packages need to be installed: mingw64-gcc
+    mingw64-libpng mingw64-libjpeg-turbo mingw64-SDL mingw64-SDL_image
+    yasm. It is recommended to use yasm version >= 1.3.0 to have a
+    faster compilation.
+
+  - Only a 64 bit target is supported because x265 needs it for bit
+    depths > 8.
+
+3) BPG encoder
 --------------

 The BPG command line encoder is 'bpgenc'. It takes JPEG or PNG images
 as input.

- Speed: by default bpgenc uses the JCTVC encoder which has a high
-  quality but is slow. If you compiled with x265, you can have a much
-  faster encoding with the '-e x265' option. With x265 you can also
-  select the encoding speed with the '-m' option (1 = fast, but larger
-  image, 9 = slower but smaller image). Warning: x265 does not support
-  monochrome (and alpha) yet, so you must use the JCTVC encoder for
-  these cases.
+- Speed: by default bpgenc uses the x265. You can compile the much
+  slower but more efficient JCTVC encoder and select it with the '-e
+  jctvc' option. With x265 you can select the encoding speed with the
+  '-m' option (1 = fast, but larger image, 9 = slower but smaller
+  image).

 - Bit depth: the default bit depth is 8. You can increase it to 10
  ('-b 10' option) to slightly increase the compression ratio. For web
  publishing it is generally not a good idea because the Javascript
-  decoder uses more memory.
+  decoder uses more memory. The compiled x265 encoder supports the bit
+  depth of 8, 10 and 12. The slower JCTVC encoder can be compiled to
+  support higher bit depths (up to 14) by enabling the Makefile
+  define: USE_JCTVC_HIGH_BIT_DEPTH.

 - Lossless compression is supported as a bonus thru the HEVC lossless
  capabilities. Use a PNG input in this case unless you know what you
@ -60,15 +87,8 @@ as input.
    - the JCTVC encoder gives smaller images than the x265 encoder
      with lossless compression.

- There is a difference of interpretation of the quantizer parameter
-  (-q option) between the x265 and JCTVC encoder. The default value is
-  optimized for the JCTVC encoder, not for x265. We will try to align
-  the x265 value to JCTVC in the future.
-
- By default, the JCTVC encoder is limited to a precision of 12
-  bits. You can enable high bit depths (up to 14) by enabling the
-  Makefile define: USE_JCTVC_HIGH_BIT_DEPTH. The encoder is sligthly
-  slower in this case.
+- There is a small difference of interpretation of the quantizer
+  parameter (-q option) between the x265 and JCTVC encoder.

 - Color space and chroma format:

@ -125,11 +145,11 @@ as input.
  with the '-keepmetadata' option. For JPEG input, EXIF, ICCP and XMP
  are copied. For PNG input, ICCP is copied.

- Objective comparisons: the JCTVC encoder is tuned for PSNR only, not
-  for SSIM, so you should use PSNR when making objective comparison
-  with other formats. x265 is tuned by default for SSIM.
+- Objective comparisons: x265 is tuned by default for SSIM. the JCTVC
+  encoder is tuned for PSNR only, not for SSIM, so you should use PSNR
+  when making objective comparison with other formats.

-3) BPG decoder
+4) BPG decoder
 --------------

 The BPG command line decoder is bpgdec. It outputs a PNG or PPM
@ -141,7 +161,7 @@ no decoded image is output).
 - The '-b' option selects the bit depth (8 or 16) of the PNG
  output. It is independent of the internal BPG bit depth.

-4) BPG viewer
+5) BPG viewer
 -------------

 The BPG image viewer uses the SDL library to display BPG images and
@ -149,7 +169,7 @@ other image formats supported by the SDL_image library. The available
 keys are displayed by launching bpgview without parameters. bpgview
 supports BPG animations.

-5) BPG decoding library
+6) BPG decoding library
 -----------------------

 BPG images can be decoded in any program with the libbpg
@ -161,7 +181,7 @@ provided as a static one.
 Currently there is no similar library for encoding so you should
 invoke the bpgenc utility.

-6) Javascript decoder
+7) Javascript decoder
 ---------------------

 The following Javascript decoders are available, sorted by increasing size:
@ -193,7 +213,7 @@ be avoided, as with animated GIFs.
 asm.js gives an interesting speed boost, so we hope that more browsers
 will support this Javascript subset.

-7) FFmpeg modifications
+8) FFmpeg modifications
 -----------------------

 - Completed support of chroma_format_idc = 0 (monochrome mode).
@ -217,16 +237,25 @@ will support this Javascript subset.
 - Stripped FFmpeg from all codecs except HEVC and the necessary
  support code.

-8) Licensing
------------
+9) x265 modifications
+---------------------
+
+- Support of monochrome format (some part not used by BPG may be
+  missing).
+
+- Support of static build.
+
+10) Licensing
+-------------

 - libbpg and bpgdec are released under the LGPL license (the FFmpeg
  part is under the LGPL, the BPG specific part is released under the
  BSD license).

- bpgenc is released under the BSD license (it includes the JCTVC code
-  which is released under the BSD license. The BPG specific part is
-  released under the BSD license).
+- bpgenc is released under the GPL version 2 license. The BPG specific
+  code is released under the BSD license. The JCTVC code is released
+  under the BSD license. The x265 code is released under the GPL
+  version 2 license.

 - BPG relies on the HEVC compression technology which may be protected
  by patents in some countries. Most devices already include or will
--- a/2
+++ b/2
@ -1 +1 @@
-0.9.5
+0.9.6
--- a/bpgenc.c
+++ b/bpgenc.c
@ -1893,10 +1893,12 @@ static int build_modified_sps(uint8_t **pout_buf, int *pout_buf_len,
            int vui_hrd_parameters_present_flag, bitstream_restriction_flag;

            sar_present = get_bits(gb, 1);
-            sar_idx = get_bits(gb, 8);
-            if (sar_idx == 255) {
-                skip_bits(gb, 16); /* sar_num */ 
-                skip_bits(gb, 16); /* sar_den */ 
+            if (sar_present) {
+                sar_idx = get_bits(gb, 8);
+                if (sar_idx == 255) {
+                    skip_bits(gb, 16); /* sar_num */ 
+                    skip_bits(gb, 16); /* sar_den */ 
+                }
            }
            
            overscan_info_present_flag = get_bits(gb, 1);
@ -2159,38 +2161,38 @@ static int build_modified_hevc(uint8_t **pout_buf,
 }

 typedef enum {
-#if defined(USE_JCTVC)
-    HEVC_ENCODER_JCTVC,
-#endif
 #if defined(USE_X265)
    HEVC_ENCODER_X265,
 #endif
+#if defined(USE_JCTVC)
+    HEVC_ENCODER_JCTVC,
+#endif

    HEVC_ENCODER_COUNT,
 } HEVCEncoderEnum;

 static char *hevc_encoder_name[HEVC_ENCODER_COUNT] = {
-#if defined(USE_JCTVC)
-    "jctvc",
-#endif
 #if defined(USE_X265)
    "x265",
 #endif
+#if defined(USE_JCTVC)
+    "jctvc",
+#endif
 };

 static HEVCEncoder *hevc_encoder_tab[HEVC_ENCODER_COUNT] = {
-#if defined(USE_JCTVC)
-    &jctvc_encoder,
-#endif
 #if defined(USE_X265)
    &x265_hevc_encoder,
 #endif
+#if defined(USE_JCTVC)
+    &jctvc_encoder,
+#endif
 };

 #define IMAGE_HEADER_MAGIC 0x425047fb

 #define DEFAULT_OUTFILENAME "out.bpg"
-#define DEFAULT_QP 28
+#define DEFAULT_QP 29
 #define DEFAULT_BIT_DEPTH 8

 #ifdef RExt__HIGH_BIT_DEPTH_SUPPORT
--- a/doc/bpg_spec.txt
+++ b/doc/bpg_spec.txt
@ -79,9 +79,7 @@ heic_file() {
     
     picture_data_length                                         ue7(32)
     if (extension_present_flag)  
-        extension_data_length                                    ue7(32)
-    
-     if (extension_present_flag) {
+         extension_data_length                                   ue7(32)
         extension_data()
     }

--- a/html/bpgdec.js
+++ b/html/bpgdec.js
--- a/html/bpgdec8.js
+++ b/html/bpgdec8.js
--- a/html/bpgdec8a.js
+++ b/html/bpgdec8a.js
--- a/libbpg.c
+++ b/libbpg.c
@ -1786,6 +1786,7 @@ static int bpg_decode_header(BPGHeaderData *h,
                    md = av_malloc(sizeof(BPGExtensionData));
                    md->tag = tag;
                    md->buf_len = buf_len;
+                    md->next = NULL;
                    *plast_md = md;
                    plast_md = &md->next;
                    
--- a/x265/COPYING
+++ b/x265/COPYING
@ -0,0 +1,343 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+This program is also available under a commercial proprietary license.
+For more information, contact us at license @ x265.com.
--- a/x265/build/README.txt
+++ b/x265/build/README.txt
@ -0,0 +1,71 @@
+= Mandatory Prerequisites =
+
+* GCC, MSVC (9, 10, 11, 12), Xcode or Intel C/C++
+* CMake 2.8.8 or later http://www.cmake.org
+* On linux, ccmake is helpful, usually a package named cmake-curses-gui 
+
+Note: MSVC12 requires cmake 2.8.11 or later
+
+
+= Optional Prerequisites =
+
+1. Yasm 1.2.0 or later, to compile assembly primitives (performance)
+
+   For Windows, download the latest yasm executable
+   http://yasm.tortall.net/Download.html and copy the EXE into
+   C:\Windows or somewhere else in your %PATH% that a 32-bit app (cmake)
+   can find it. If it is not in the path, you must manually tell cmake
+   where to find it.  Note: you do not need the vsyasm packages, x265
+   does not use them.  You only need the yasm executable itself.
+
+   On Linux, the packaged yasm may be older than 1.2, in which case
+   so you will need get the latest source and build it yourself.
+
+   Once YASM is properly installed, run cmake to regenerate projects. If you
+   do not see the below line in the cmake output, YASM is not in the PATH.
+
+   -- Found Yasm 1.3.0 to build assembly primitives
+
+   Now build the encoder and run x265 -V:
+
+   x265 [info]: using cpu capabilities: MMX, SSE2, ...
+
+   If cpu capabilities line says 'none!', then the encoder was built
+   without yasm.
+
+2. VisualLeakDetector (Windows Only)
+
+   Download from https://vld.codeplex.com/releases and install. May need
+   to re-login in order for it to be in your %PATH%.  Cmake will find it
+   and enable leak detection in debug builds without any additional work.
+
+   If VisualLeakDetector is not installed, cmake will complain a bit, but
+   it is completely harmless.
+
+
+= Build Instructions Linux =
+
+1. Use cmake to generate Makefiles: cmake ../source
+2. Build x265:                      make
+
+  Or use our shell script which runs cmake then opens the curses GUI to
+  configure build options
+
+1. cd build/linux ; ./make-Makefiles.bash
+2. make
+
+
+= Build Instructions Windows =
+
+We recommend you use one of the make-solutions.bat files in the appropriate
+build/ sub-folder for your preferred compiler.  They will open the cmake-gui
+to configure build options, click configure until no more red options remain,
+then click generate and exit.  There should now be an x265.sln file in the
+same folder, open this in Visual Studio and build it.
+
+= Version number considerations =
+
+Note that cmake will update X265_VERSION each time cmake runs, if you are
+building out of a Mercurial source repository.  If you are building out of
+a release source package, the version will not change.  If Mercurial is not
+found, the version will be "unknown".
--- a/x265/build/linux/make-Makefiles.bash
+++ b/x265/build/linux/make-Makefiles.bash
@ -0,0 +1,3 @@
+#!/bin/bash
+# Run this from within a bash shell
+cmake -G "Unix Makefiles" ../../source && ccmake ../../source
--- a/x265/build/linux/multilib.sh
+++ b/x265/build/linux/multilib.sh
@ -0,0 +1,41 @@
+#!/bin/sh
+
+mkdir -p 8bit 10bit 12bit
+
+cd 12bit
+cmake ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+make ${MAKEFLAGS}
+
+cd ../10bit
+cmake ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+make ${MAKEFLAGS}
+
+cd ../8bit
+ln -sf ../10bit/libx265.a libx265_main10.a
+ln -sf ../12bit/libx265.a libx265_main12.a
+cmake ../../../source -DEXTRA_LIB="x265_main10.a;x265_main12.a" -DEXTRA_LINK_FLAGS=-L. -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+make ${MAKEFLAGS}
+
+# rename the 8bit library, then combine all three into libx265.a
+mv libx265.a libx265_main.a
+
+uname=`uname`
+if [ "$uname" = "Linux" ]
+then
+
+# On Linux, we use GNU ar to combine the static libraries together
+ar -M <<EOF
+CREATE libx265.a
+ADDLIB libx265_main.a
+ADDLIB libx265_main10.a
+ADDLIB libx265_main12.a
+SAVE
+END
+EOF
+
+else
+
+# Mac/BSD libtool
+libtool -static -o libx265.a libx265_main.a libx265_main10.a libx265_main12.a 2>/dev/null
+
+fi
--- a/x265/build/msys/make-Makefiles.sh
+++ b/x265/build/msys/make-Makefiles.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+# Run this from within an MSYS bash shell
+cmake -G "MSYS Makefiles" ../../source && cmake-gui ../../source
--- a/x265/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
+++ b/x265/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
@ -0,0 +1,8 @@
+#!/bin/sh
+
+# This will generate a cross-compile environment, compiling an x86_64
+# Win64 target from a 32bit MinGW32 host environment.  If your MinGW
+# install is 64bit, you can use the native compiler batch file:
+# make-Makefiles.sh
+
+cmake -G "MSYS Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source && cmake-gui ../../source
--- a/x265/build/msys/multilib.sh
+++ b/x265/build/msys/multilib.sh
@ -0,0 +1,29 @@
+#!/bin/sh
+
+mkdir -p 8bit 10bit 12bit
+
+cd 12bit
+cmake -G "MSYS Makefiles" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+make ${MAKEFLAGS}
+cp libx265.a ../8bit/libx265_main12.a
+
+cd ../10bit
+cmake -G "MSYS Makefiles" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+make ${MAKEFLAGS}
+cp libx265.a ../8bit/libx265_main10.a
+
+cd ../8bit
+cmake -G "MSYS Makefiles" ../../../source -DEXTRA_LIB="x265_main10.a;x265_main12.a" -DEXTRA_LINK_FLAGS=-L. -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+make ${MAKEFLAGS}
+
+# rename the 8bit library, then combine all three into libx265.a using GNU ar
+mv libx265.a libx265_main.a
+
+ar -M <<EOF
+CREATE libx265.a
+ADDLIB libx265_main.a
+ADDLIB libx265_main10.a
+ADDLIB libx265_main12.a
+SAVE
+END
+EOF
--- a/x265/build/msys/toolchain-mingw32.cmake
+++ b/x265/build/msys/toolchain-mingw32.cmake
@ -0,0 +1,9 @@
+# cmake -DCMAKE_TOOLCHAIN_FILE=toolchain-mingw32.cmake
+# this one is important
+SET(CMAKE_SYSTEM_NAME Windows)
+
+# specify the cross compiler
+SET(CMAKE_C_COMPILER   i686-w64-mingw32-gcc)
+SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
+SET(CMAKE_RC_COMPILER i686-w64-mingw32-windres)
+SET(CMAKE_ASM_YASM_COMPILER yasm)
--- a/x265/build/msys/toolchain-x86_64-w64-mingw32.cmake
+++ b/x265/build/msys/toolchain-x86_64-w64-mingw32.cmake
@ -0,0 +1,6 @@
+SET(CMAKE_SYSTEM_NAME Windows)
+SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
+SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
+SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
+SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)
+SET(CMAKE_ASM_YASM_COMPILER yasm)
--- a/x265/build/vc10-x86/build-all.bat
+++ b/x265/build/vc10-x86/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS100COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 10 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc10-x86/make-solutions.bat
+++ b/x265/build/vc10-x86/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 10" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc10-x86_64/build-all.bat
+++ b/x265/build/vc10-x86_64/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS100COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 10 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc10-x86_64/make-solutions.bat
+++ b/x265/build/vc10-x86_64/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 10 Win64" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc10-x86_64/multilib.bat
+++ b/x265/build/vc10-x86_64/multilib.bat
@ -0,0 +1,44 @@
+@echo off
+if "%VS100COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 10 not detected"
+  exit 1
+)
+
+call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat"
+
+@mkdir 12bit
+@mkdir 10bit
+@mkdir 8bit
+
+@cd 12bit
+cmake -G "Visual Studio 10 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
+)
+
+@cd ..\10bit
+cmake -G "Visual Studio 10 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
+)
+
+@cd ..\8bit
+if not exist x265-static-main10.lib (
+  msg "%username%" "10bit build failed"
+  exit 1
+)
+if not exist x265-static-main12.lib (
+  msg "%username%" "12bit build failed"
+  exit 1
+)
+cmake -G "Visual Studio 10 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
+  move Release\x265-static.lib x265-static-main.lib
+  LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
+)
+
+pause
--- a/x265/build/vc11-x86/build-all.bat
+++ b/x265/build/vc11-x86/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS110COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 11 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS110COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc11-x86/make-solutions.bat
+++ b/x265/build/vc11-x86/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 11" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc11-x86_64/build-all.bat
+++ b/x265/build/vc11-x86_64/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS110COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 11 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS110COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc11-x86_64/make-solutions.bat
+++ b/x265/build/vc11-x86_64/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 11 Win64" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc11-x86_64/multilib.bat
+++ b/x265/build/vc11-x86_64/multilib.bat
@ -0,0 +1,44 @@
+@echo off
+if "%VS110COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 11 not detected"
+  exit 1
+)
+
+call "%VS110COMNTOOLS%\..\..\VC\vcvarsall.bat"
+
+@mkdir 12bit
+@mkdir 10bit
+@mkdir 8bit
+
+@cd 12bit
+cmake -G "Visual Studio 11 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
+)
+
+@cd ..\10bit
+cmake -G "Visual Studio 11 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
+)
+
+@cd ..\8bit
+if not exist x265-static-main10.lib (
+  msg "%username%" "10bit build failed"
+  exit 1
+)
+if not exist x265-static-main12.lib (
+  msg "%username%" "12bit build failed"
+  exit 1
+)
+cmake -G "Visual Studio 11 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
+  move Release\x265-static.lib x265-static-main.lib
+  LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
+)
+
+pause
--- a/x265/build/vc12-x86/build-all.bat
+++ b/x265/build/vc12-x86/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS120COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 12 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS120COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc12-x86/make-solutions.bat
+++ b/x265/build/vc12-x86/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 12" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc12-x86_64/build-all.bat
+++ b/x265/build/vc12-x86_64/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS120COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 12 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS120COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc12-x86_64/make-solutions.bat
+++ b/x265/build/vc12-x86_64/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 12 Win64" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc12-x86_64/multilib.bat
+++ b/x265/build/vc12-x86_64/multilib.bat
@ -0,0 +1,44 @@
+@echo off
+if "%VS120COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 12 not detected"
+  exit 1
+)
+
+call "%VS120COMNTOOLS%\..\..\VC\vcvarsall.bat"
+
+@mkdir 12bit
+@mkdir 10bit
+@mkdir 8bit
+
+@cd 12bit
+cmake -G "Visual Studio 12 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
+)
+
+@cd ..\10bit
+cmake -G "Visual Studio 12 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
+)
+
+@cd ..\8bit
+if not exist x265-static-main10.lib (
+  msg "%username%" "10bit build failed"
+  exit 1
+)
+if not exist x265-static-main12.lib (
+  msg "%username%" "12bit build failed"
+  exit 1
+)
+cmake -G "Visual Studio 12 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
+  move Release\x265-static.lib x265-static-main.lib
+  LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
+)
+
+pause
--- a/x265/build/vc9-x86/build-all.bat
+++ b/x265/build/vc9-x86/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS90COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 9 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc9-x86/make-solutions.bat
+++ b/x265/build/vc9-x86/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 9 2008" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc9-x86_64/build-all.bat
+++ b/x265/build/vc9-x86_64/build-all.bat
@ -0,0 +1,14 @@
+@echo off
+if "%VS90COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 9 not detected"
+  exit 1
+)
+if not exist x265.sln (
+  call make-solutions.bat
+)
+if exist x265.sln (
+  call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat"
+  MSBuild /property:Configuration="Release" x265.sln
+  MSBuild /property:Configuration="Debug" x265.sln
+  MSBuild /property:Configuration="RelWithDebInfo" x265.sln
+)
--- a/x265/build/vc9-x86_64/make-solutions.bat
+++ b/x265/build/vc9-x86_64/make-solutions.bat
@ -0,0 +1,6 @@
+@echo off
+::
+:: run this batch file to create a Visual Studio solution file for this project.
+:: See the cmake documentation for other generator targets
+::
+cmake -G "Visual Studio 9 2008 Win64" ..\..\source && cmake-gui ..\..\source
--- a/x265/build/vc9-x86_64/multilib.bat
+++ b/x265/build/vc9-x86_64/multilib.bat
@ -0,0 +1,44 @@
+@echo off
+if "%VS90COMNTOOLS%" == "" (
+  msg "%username%" "Visual Studio 9 not detected"
+  exit 1
+)
+
+call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat"
+
+@mkdir 12bit
+@mkdir 10bit
+@mkdir 8bit
+
+@cd 12bit
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
+)
+
+@cd ..\10bit
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
+)
+
+@cd ..\8bit
+if not exist x265-static-main10.lib (
+  msg "%username%" "10bit build failed"
+  exit 1
+)
+if not exist x265-static-main12.lib (
+  msg "%username%" "12bit build failed"
+  exit 1
+)
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
+if exist x265.sln (
+  MSBuild /property:Configuration="Release" x265.sln
+  :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
+  move Release\x265-static.lib x265-static-main.lib
+  LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
+)
+
+pause
--- a/x265/build/xcode/make-project.sh
+++ b/x265/build/xcode/make-project.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+cmake -G "Xcode" ../../source && ccmake ../../source
--- a/x265/doc/intra/intra-16x16.txt
+++ b/x265/doc/intra/intra-16x16.txt
@ -0,0 +1,561 @@
+--- 16x16, Mode= 2 [F]---
+[ 0]: Fact= 0:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,            *
+[ 1]: Fact= 0:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,            *
+[ 2]: Fact= 0:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,            *
+[ 3]: Fact= 0:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,            *
+[ 4]: Fact= 0:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,            *
+[ 5]: Fact= 0:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,            *
+[ 6]: Fact= 0:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,            *
+[ 7]: Fact= 0:	 -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,            *
+[ 8]: Fact= 0:	-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,            *
+[ 9]: Fact= 0:	-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,            *
+[10]: Fact= 0:	-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,            *
+[11]: Fact= 0:	-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,            *
+[12]: Fact= 0:	-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,            *
+[13]: Fact= 0:	-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,            *
+[14]: Fact= 0:	-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,            *
+[15]: Fact= 0:	-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,            *
+--- 16x16, Mode= 3 [F]---
+[ 0]: Fact=26:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact=20:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 2]: Fact=14:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 3]: Fact= 8:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 4]: Fact= 2:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[ 5]: Fact=28:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[ 6]: Fact=22:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[ 7]: Fact=16:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+[ 8]: Fact=10:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,        *
+[ 9]: Fact= 4:	 -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,        *
+[10]: Fact=30:	 -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,        *
+[11]: Fact=24:	-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,        *
+[12]: Fact=18:	-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,        *
+[13]: Fact=12:	-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,        *
+[14]: Fact= 6:	-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,        *
+[15]: Fact= 0:	-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,            *
+--- 16x16, Mode= 4 [F]---
+[ 0]: Fact=21:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact=10:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 2]: Fact=31:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 3]: Fact=20:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 4]: Fact= 9:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 5]: Fact=30:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 6]: Fact=19:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[ 7]: Fact= 8:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[ 8]: Fact=29:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[ 9]: Fact=18:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+[10]: Fact= 7:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,        *
+[11]: Fact=28:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,        *
+[12]: Fact=17:	 -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,        *
+[13]: Fact= 6:	-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,        *
+[14]: Fact=27:	-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,        *
+[15]: Fact=16:	-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,        *
+--- 16x16, Mode= 5 [F]---
+[ 0]: Fact=17:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact= 2:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 2]: Fact=19:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 3]: Fact= 4:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 4]: Fact=21:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 5]: Fact= 6:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 6]: Fact=23:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 7]: Fact= 8:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[ 8]: Fact=25:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[ 9]: Fact=10:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[10]: Fact=27:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[11]: Fact=12:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+[12]: Fact=29:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+[13]: Fact=14:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,        *
+[14]: Fact=31:	 -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,        *
+[15]: Fact=16:	 -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,        *
+--- 16x16, Mode= 6 [F]---
+[ 0]: Fact=13:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact=26:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 2]: Fact= 7:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 3]: Fact=20:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 4]: Fact= 1:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 5]: Fact=14:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 6]: Fact=27:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 7]: Fact= 8:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 8]: Fact=21:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[ 9]: Fact= 2:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[10]: Fact=15:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[11]: Fact=28:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[12]: Fact= 9:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[13]: Fact=22:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,        *
+[14]: Fact= 3:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+[15]: Fact=16:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,        *
+--- 16x16, Mode= 7 [F]---
+[ 0]: Fact= 9:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact=18:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 2]: Fact=27:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 3]: Fact= 4:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 4]: Fact=13:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 5]: Fact=22:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 6]: Fact=31:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 7]: Fact= 8:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 8]: Fact=17:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[ 9]: Fact=26:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[10]: Fact= 3:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[11]: Fact=12:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[12]: Fact=21:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[13]: Fact=30:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,        *
+[14]: Fact= 7:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+[15]: Fact=16:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,        *
+--- 16x16, Mode= 8 [F]---
+[ 0]: Fact= 5:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact=10:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 2]: Fact=15:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 3]: Fact=20:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 4]: Fact=25:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 5]: Fact=30:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 6]: Fact= 3:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 7]: Fact= 8:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 8]: Fact=13:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[ 9]: Fact=18:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[10]: Fact=23:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[11]: Fact=28:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,        *
+[12]: Fact= 1:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[13]: Fact= 6:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[14]: Fact=11:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+[15]: Fact=16:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,        *
+--- 16x16, Mode= 9 [ ]---
+[ 0]: Fact= 2:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 1]: Fact= 4:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 2]: Fact= 6:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 3]: Fact= 8:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 4]: Fact=10:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 5]: Fact=12:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 6]: Fact=14:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 7]: Fact=16:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 8]: Fact=18:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[ 9]: Fact=20:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[10]: Fact=22:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[11]: Fact=24:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[12]: Fact=26:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[13]: Fact=28:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[14]: Fact=30:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,        *
+[15]: Fact= 0:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,-17,            *
+--- 16x16, Mode=10 [ ]---
+[ 0]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 1]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 2]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 3]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 4]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 5]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 6]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 7]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 8]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[ 9]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[10]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[11]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[12]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[13]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[14]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+[15]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,            *
+--- 16x16, Mode=11 [ ]---
+[ 0]: Fact=30:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=28:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 2]: Fact=26:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 3]: Fact=24:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 4]: Fact=22:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 5]: Fact=20:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 6]: Fact=18:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 7]: Fact=16:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 8]: Fact=14:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 9]: Fact=12:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[10]: Fact=10:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[11]: Fact= 8:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[12]: Fact= 6:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[13]: Fact= 4:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[14]: Fact= 2:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[15]: Fact= 0:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,            *
+--- 16x16, Mode=12 [F]---
+[ 0]: Fact=27:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=22:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 2]: Fact=17:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 3]: Fact=12:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 4]: Fact= 7:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 5]: Fact= 2:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 6]: Fact=29:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 7]: Fact=24:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 8]: Fact=19:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 9]: Fact=14:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[10]: Fact= 9:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[11]: Fact= 4:	  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[12]: Fact=31:	 13,  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[13]: Fact=26:	 13,  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[14]: Fact=21:	 13,  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[15]: Fact=16:	 13,  6,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+--- 16x16, Mode=13 [F]---
+[ 0]: Fact=23:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=14:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 2]: Fact= 5:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 3]: Fact=28:	  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 4]: Fact=19:	  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 5]: Fact=10:	  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 6]: Fact= 1:	  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 7]: Fact=24:	  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 8]: Fact=15:	  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 9]: Fact= 6:	  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[10]: Fact=29:	 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[11]: Fact=20:	 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[12]: Fact=11:	 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[13]: Fact= 2:	 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[14]: Fact=25:	 14, 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[15]: Fact=16:	 14, 11,  7,  4,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+--- 16x16, Mode=14 [F]---
+[ 0]: Fact=19:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact= 6:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 2]: Fact=25:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 3]: Fact=12:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 4]: Fact=31:	  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 5]: Fact=18:	  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 6]: Fact= 5:	  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 7]: Fact=24:	  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 8]: Fact=11:	  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 9]: Fact=30:	 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[10]: Fact=17:	 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[11]: Fact= 4:	 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[12]: Fact=23:	 12, 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[13]: Fact=10:	 12, 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[14]: Fact=29:	 15, 12, 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[15]: Fact=16:	 15, 12, 10,  7,  5,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+--- 16x16, Mode=15 [F]---
+[ 0]: Fact=15:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=30:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 2]: Fact=13:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 3]: Fact=28:	  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 4]: Fact=11:	  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 5]: Fact=26:	  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 6]: Fact= 9:	  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 7]: Fact=24:	  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 8]: Fact= 7:	  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 9]: Fact=22:	  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[10]: Fact= 5:	  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[11]: Fact=20:	 11,  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[12]: Fact= 3:	 11,  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[13]: Fact=18:	 13, 11,  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[14]: Fact= 1:	 13, 11,  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[15]: Fact=16:	 15, 13, 11,  9,  8,  6,  4,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+--- 16x16, Mode=16 [F]---
+[ 0]: Fact=11:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=22:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 2]: Fact= 1:	  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 3]: Fact=12:	  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 4]: Fact=23:	  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 5]: Fact= 2:	  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 6]: Fact=13:	  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 7]: Fact=24:	  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 8]: Fact= 3:	  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 9]: Fact=14:	  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[10]: Fact=25:	 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[11]: Fact= 4:	 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[12]: Fact=15:	 12, 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[13]: Fact=26:	 14, 12, 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[14]: Fact= 5:	 14, 12, 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[15]: Fact=16:	 15, 14, 12, 11,  9,  8,  6,  5,  3,  2,  0, -1, -2, -3, -4, -5, -6,        *
+--- 16x16, Mode=17 [F]---
+[ 0]: Fact= 6:	  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,-16,        *
+[ 1]: Fact=12:	  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+[ 2]: Fact=18:	  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 3]: Fact=24:	  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 4]: Fact=30:	  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 5]: Fact= 4:	  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 6]: Fact=10:	  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 7]: Fact=16:	  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 8]: Fact=22:	  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 9]: Fact=28:	 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[10]: Fact= 2:	 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[11]: Fact= 8:	 11, 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[12]: Fact=14:	 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5, -6,        *
+[13]: Fact=20:	 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4, -5,        *
+[14]: Fact=26:	 15, 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3, -4,        *
+[15]: Fact= 0:	 15, 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0, -1, -2, -3,            *
+--- 16x16, Mode=18 [F]---
+[ 0]: Fact= 0:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,            *
+[ 1]: Fact= 0:	 -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,            *
+[ 2]: Fact= 0:	 -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,            *
+[ 3]: Fact= 0:	 -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,            *
+[ 4]: Fact= 0:	 -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,            *
+[ 5]: Fact= 0:	 -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,            *
+[ 6]: Fact= 0:	 -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,            *
+[ 7]: Fact= 0:	 -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 8]: Fact= 0:	 -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,            *
+[ 9]: Fact= 0:	 -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,  6,            *
+[10]: Fact= 0:	-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5,            *
+[11]: Fact= 0:	-11,-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,  4,            *
+[12]: Fact= 0:	-12,-11,-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,  3,            *
+[13]: Fact= 0:	-13,-12,-11,-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,  2,            *
+[14]: Fact= 0:	-14,-13,-12,-11,-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,  1,            *
+[15]: Fact= 0:	-15,-14,-13,-12,-11,-10, -9, -8, -7, -6, -5, -4, -3, -2, -1,  0,            *
+--- 16x16, Mode=19 [F]---
+[ 0]: Fact= 6:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=12:	 -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 2]: Fact=18:	 -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 3]: Fact=24:	 -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 4]: Fact=30:	 -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 5]: Fact= 4:	 -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 6]: Fact=10:	 -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 7]: Fact=16:	 -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 8]: Fact=22:	 -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 9]: Fact=28:	-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[10]: Fact= 2:	-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[11]: Fact= 8:	-11,-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[12]: Fact=14:	-12,-11,-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,  6,        *
+[13]: Fact=20:	-14,-12,-11,-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,  5,        *
+[14]: Fact=26:	-15,-14,-12,-11,-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,  4,        *
+[15]: Fact= 0:	-15,-14,-12,-11,-10, -9, -7, -6, -5, -4, -2, -1,  0,  1,  2,  3,            *
+--- 16x16, Mode=20 [F]---
+[ 0]: Fact=11:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=22:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 2]: Fact= 1:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 3]: Fact=12:	 -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 4]: Fact=23:	 -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 5]: Fact= 2:	 -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 6]: Fact=13:	 -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 7]: Fact=24:	 -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 8]: Fact= 3:	 -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 9]: Fact=14:	 -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[10]: Fact=25:	-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[11]: Fact= 4:	-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[12]: Fact=15:	-12,-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[13]: Fact=26:	-14,-12,-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[14]: Fact= 5:	-14,-12,-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[15]: Fact=16:	-15,-14,-12,-11, -9, -8, -6, -5, -3, -2,  0,  1,  2,  3,  4,  5,  6,        *
+--- 16x16, Mode=21 [F]---
+[ 0]: Fact=15:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=30:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 2]: Fact=13:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 3]: Fact=28:	 -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 4]: Fact=11:	 -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 5]: Fact=26:	 -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 6]: Fact= 9:	 -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 7]: Fact=24:	 -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 8]: Fact= 7:	 -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 9]: Fact=22:	 -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[10]: Fact= 5:	 -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[11]: Fact=20:	-11, -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[12]: Fact= 3:	-11, -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[13]: Fact=18:	-13,-11, -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[14]: Fact= 1:	-13,-11, -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[15]: Fact=16:	-15,-13,-11, -9, -8, -6, -4, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+--- 16x16, Mode=22 [F]---
+[ 0]: Fact=19:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact= 6:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 2]: Fact=25:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 3]: Fact=12:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 4]: Fact=31:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 5]: Fact=18:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 6]: Fact= 5:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 7]: Fact=24:	 -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 8]: Fact=11:	 -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 9]: Fact=30:	-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[10]: Fact=17:	-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[11]: Fact= 4:	-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[12]: Fact=23:	-12,-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[13]: Fact=10:	-12,-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[14]: Fact=29:	-15,-12,-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[15]: Fact=16:	-15,-12,-10, -7, -5, -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+--- 16x16, Mode=23 [F]---
+[ 0]: Fact=23:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=14:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 2]: Fact= 5:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 3]: Fact=28:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 4]: Fact=19:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 5]: Fact=10:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 6]: Fact= 1:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 7]: Fact=24:	 -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 8]: Fact=15:	 -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 9]: Fact= 6:	 -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[10]: Fact=29:	-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[11]: Fact=20:	-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[12]: Fact=11:	-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[13]: Fact= 2:	-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[14]: Fact=25:	-14,-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[15]: Fact=16:	-14,-11, -7, -4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+--- 16x16, Mode=24 [F]---
+[ 0]: Fact=27:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=22:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 2]: Fact=17:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 3]: Fact=12:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 4]: Fact= 7:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 5]: Fact= 2:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 6]: Fact=29:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 7]: Fact=24:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 8]: Fact=19:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[ 9]: Fact=14:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[10]: Fact= 9:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[11]: Fact= 4:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+[12]: Fact=31:	-13, -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[13]: Fact=26:	-13, -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[14]: Fact=21:	-13, -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[15]: Fact=16:	-13, -6,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+--- 16x16, Mode=25 [ ]---
+[ 0]: Fact=30:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 1]: Fact=28:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 2]: Fact=26:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 3]: Fact=24:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 4]: Fact=22:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 5]: Fact=20:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 6]: Fact=18:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 7]: Fact=16:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 8]: Fact=14:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[ 9]: Fact=12:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[10]: Fact=10:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[11]: Fact= 8:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[12]: Fact= 6:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[13]: Fact= 4:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[14]: Fact= 2:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,        *
+[15]: Fact= 0:	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,            *
+--- 16x16, Mode=26 [ ]---
+[ 0]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 1]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 2]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 3]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 4]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 5]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 6]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 7]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 8]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[ 9]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[10]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[11]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[12]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[13]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[14]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+[15]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,            *
+--- 16x16, Mode=27 [ ]---
+[ 0]: Fact= 2:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact= 4:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 2]: Fact= 6:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 3]: Fact= 8:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 4]: Fact=10:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 5]: Fact=12:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 6]: Fact=14:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 7]: Fact=16:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 8]: Fact=18:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 9]: Fact=20:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[10]: Fact=22:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[11]: Fact=24:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[12]: Fact=26:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[13]: Fact=28:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[14]: Fact=30:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[15]: Fact= 0:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,            *
+--- 16x16, Mode=28 [F]---
+[ 0]: Fact= 5:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact=10:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 2]: Fact=15:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 3]: Fact=20:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 4]: Fact=25:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 5]: Fact=30:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 6]: Fact= 3:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 7]: Fact= 8:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 8]: Fact=13:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 9]: Fact=18:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[10]: Fact=23:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[11]: Fact=28:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[12]: Fact= 1:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[13]: Fact= 6:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[14]: Fact=11:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[15]: Fact=16:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+--- 16x16, Mode=29 [F]---
+[ 0]: Fact= 9:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact=18:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 2]: Fact=27:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 3]: Fact= 4:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 4]: Fact=13:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 5]: Fact=22:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 6]: Fact=31:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 7]: Fact= 8:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 8]: Fact=17:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 9]: Fact=26:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[10]: Fact= 3:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[11]: Fact=12:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[12]: Fact=21:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[13]: Fact=30:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[14]: Fact= 7:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[15]: Fact=16:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+--- 16x16, Mode=30 [F]---
+[ 0]: Fact=13:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact=26:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 2]: Fact= 7:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 3]: Fact=20:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 4]: Fact= 1:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 5]: Fact=14:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 6]: Fact=27:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 7]: Fact= 8:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 8]: Fact=21:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 9]: Fact= 2:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[10]: Fact=15:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[11]: Fact=28:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[12]: Fact= 9:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[13]: Fact=22:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[14]: Fact= 3:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+[15]: Fact=16:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+--- 16x16, Mode=31 [F]---
+[ 0]: Fact=17:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact= 2:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 2]: Fact=19:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 3]: Fact= 4:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 4]: Fact=21:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 5]: Fact= 6:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 6]: Fact=23:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 7]: Fact= 8:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[ 8]: Fact=25:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[ 9]: Fact=10:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[10]: Fact=27:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[11]: Fact=12:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+[12]: Fact=29:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+[13]: Fact=14:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,        *
+[14]: Fact=31:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,        *
+[15]: Fact=16:	  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,        *
+--- 16x16, Mode=32 [F]---
+[ 0]: Fact=21:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact=10:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 2]: Fact=31:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 3]: Fact=20:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 4]: Fact= 9:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 5]: Fact=30:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 6]: Fact=19:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[ 7]: Fact= 8:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[ 8]: Fact=29:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[ 9]: Fact=18:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+[10]: Fact= 7:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,        *
+[11]: Fact=28:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,        *
+[12]: Fact=17:	  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,        *
+[13]: Fact= 6:	 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,        *
+[14]: Fact=27:	 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,        *
+[15]: Fact=16:	 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,        *
+--- 16x16, Mode=33 [F]---
+[ 0]: Fact=26:	  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,        *
+[ 1]: Fact=20:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,        *
+[ 2]: Fact=14:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,        *
+[ 3]: Fact= 8:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,        *
+[ 4]: Fact= 2:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[ 5]: Fact=28:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,        *
+[ 6]: Fact=22:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,        *
+[ 7]: Fact=16:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,        *
+[ 8]: Fact=10:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,        *
+[ 9]: Fact= 4:	  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,        *
+[10]: Fact=30:	  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,        *
+[11]: Fact=24:	 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,        *
+[12]: Fact=18:	 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,        *
+[13]: Fact=12:	 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,        *
+[14]: Fact= 6:	 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,        *
+[15]: Fact= 0:	 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,            *
+--- 16x16, Mode=34 [F]---
+[ 0]: Fact= 0:	  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,            *
+[ 1]: Fact= 0:	  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,            *
+[ 2]: Fact= 0:	  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,            *
+[ 3]: Fact= 0:	  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,            *
+[ 4]: Fact= 0:	  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,            *
+[ 5]: Fact= 0:	  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,            *
+[ 6]: Fact= 0:	  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,            *
+[ 7]: Fact= 0:	  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,            *
+[ 8]: Fact= 0:	 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,            *
+[ 9]: Fact= 0:	 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,            *
+[10]: Fact= 0:	 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,            *
+[11]: Fact= 0:	 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,            *
+[12]: Fact= 0:	 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,            *
+[13]: Fact= 0:	 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,            *
+[14]: Fact= 0:	 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,            *
+[15]: Fact= 0:	 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,            *
--- a/x265/doc/intra/intra-32x32.txt
+++ b/x265/doc/intra/intra-32x32.txt
--- a/x265/doc/intra/intra-4x4.txt
+++ b/x265/doc/intra/intra-4x4.txt
@ -0,0 +1,166 @@
+--- 4x4, Mode= 2 --- [refPix]
+[ 0]: Fact= 0:   10, 11, 12, 13                 *
+[ 1]: Fact= 0:   11, 12, 13, 14                 *
+[ 2]: Fact= 0:   12, 13, 14, 15                 *
+[ 3]: Fact= 0:   13, 14, 15, 16                 *
+--- 4x4, Mode= 3 ---
+[ 0]: Fact=26:    9, 10, 11, 12, 13             *
+[ 1]: Fact=20:   10, 11, 12, 13, 14             *
+[ 2]: Fact=14:   11, 12, 13, 14, 15             *
+[ 3]: Fact= 8:   12, 13, 14, 15, 16             *
+--- 4x4, Mode= 4 ---
+[ 0]: Fact=21:    9, 10, 11, 12, 13             *
+[ 1]: Fact=10:   10, 11, 12, 13, 14             *
+[ 2]: Fact=31:   10, 11, 12, 13, 14             *
+[ 3]: Fact=20:   11, 12, 13, 14, 15             *
+--- 4x4, Mode= 5 ---
+[ 0]: Fact=17:    9, 10, 11, 12, 13             *
+[ 1]: Fact= 2:   10, 11, 12, 13, 14             *
+[ 2]: Fact=19:   10, 11, 12, 13, 14             *
+[ 3]: Fact= 4:   11, 12, 13, 14, 15             *
+--- 4x4, Mode= 6 ---
+[ 0]: Fact=13:    9, 10, 11, 12, 13             *
+[ 1]: Fact=26:    9, 10, 11, 12, 13             *
+[ 2]: Fact= 7:   10, 11, 12, 13, 14             *
+[ 3]: Fact=20:   10, 11, 12, 13, 14             *
+--- 4x4, Mode= 7 ---
+[ 0]: Fact= 9:    9, 10, 11, 12, 13             *
+[ 1]: Fact=18:    9, 10, 11, 12, 13             *
+[ 2]: Fact=27:    9, 10, 11, 12, 13             *
+[ 3]: Fact= 4:   10, 11, 12, 13, 14             *
+--- 4x4, Mode= 8 ---
+[ 0]: Fact= 5:    9, 10, 11, 12, 13             *
+[ 1]: Fact=10:    9, 10, 11, 12, 13             *
+[ 2]: Fact=15:    9, 10, 11, 12, 13             *
+[ 3]: Fact=20:    9, 10, 11, 12, 13             *
+--- 4x4, Mode= 9 ---
+[ 0]: Fact= 2:    9, 10, 11, 12, 13             *
+[ 1]: Fact= 4:    9, 10, 11, 12, 13             *
+[ 2]: Fact= 6:    9, 10, 11, 12, 13             *
+[ 3]: Fact= 8:    9, 10, 11, 12, 13             *
+--- 4x4, Mode=10 --- filtPix
+[ 0]: Fact= 0:    9, 10, 11, 12                 *
+[ 1]: Fact= 0:    9, 10, 11, 12                 *
+[ 2]: Fact= 0:    9, 10, 11, 12                 *
+[ 3]: Fact= 0:    9, 10, 11, 12                 *
+--- 4x4, Mode=11 ---
+[ 0]: Fact=30:    0, 9, 10, 11, 12              *
+[ 1]: Fact=28:    0, 9, 10, 11, 12              *
+[ 2]: Fact=26:    0, 9, 10, 11, 12              *
+[ 3]: Fact=24:    0, 9, 10, 11, 12              *
+--- 4x4, Mode=12 ---
+[ 0]: Fact=27:    0, 9, 10, 11, 12              *
+[ 1]: Fact=22:    0, 9, 10, 11, 12              *
+[ 2]: Fact=17:    0, 9, 10, 11, 12              *
+[ 3]: Fact=12:    0, 9, 10, 11, 12              *
+--- 4x4, Mode=13 ---
+[ 0]: Fact=23:    0, 9, 10, 11, 12              *
+[ 1]: Fact=14:    0, 9, 10, 11, 12              *
+[ 2]: Fact= 5:    0, 9, 10, 11, 12              *
+[ 3]: Fact=28:    4, 0,  9, 10, 11              *
+--- 4x4, Mode=14 ---
+[ 0]: Fact=19:    0, 9, 10, 11, 12              *
+[ 1]: Fact= 6:    0, 9, 10, 11, 12              *
+[ 2]: Fact=25:    2, 0,  9, 10, 11              *
+[ 3]: Fact=12:    2, 0,  9, 10, 11              *
+--- 4x4, Mode=15 ---
+[ 0]: Fact=15:    0, 9, 10, 11, 12              *
+[ 1]: Fact=30:    2, 0,  9, 10, 11              *
+[ 2]: Fact=13:    2, 0,  9, 10, 11              *
+[ 3]: Fact=28:    4, 2,  0,  9, 10              *
+--- 4x4, Mode=16 ---
+[ 0]: Fact=11:    0,  9, 10, 11, 12             *
+[ 1]: Fact=22:    2,  0, 9,  10, 11             *
+[ 2]: Fact= 1:    2,  0, 9,  10, 11             *
+[ 3]: Fact=12:    3,  2, 0,  9,  10             *
+--- 4x4, Mode=17 ---
+[ 0]: Fact= 6:    0,  9, 10, 11, 12             *
+[ 1]: Fact=12:    1,  0,  9, 10, 11             *
+[ 2]: Fact=18:    2,  1,  0,  9, 10             *
+[ 3]: Fact=24:    4,  2,  1,  0,  9             *
+--- 4x4, Mode=18 ---
+[ 0]: Fact= 0:    0,  1,  2,  3                 *
+[ 1]: Fact= 0:    9,  0,  1,  2                 *
+[ 2]: Fact= 0:    10, 9,  0,  1                 *
+[ 3]: Fact= 0:    11, 10, 9,  0                 *
+--- 4x4, Mode=19 ---
+[ 0]: Fact= 6:    0,  1,  2,  3,  4             *
+[ 1]: Fact=12:    9,  0,  1,  2,  3             *
+[ 2]: Fact=18:    10, 9,  0,  1,  2             *
+[ 3]: Fact=24:    12, 10, 9,  0,  1             *
+--- 4x4, Mode=20 ---
+[ 0]: Fact=11:    0,  1,  2,  3,  4             *
+[ 1]: Fact=22:    10, 0,  1,  2,  3             *
+[ 2]: Fact= 1:    10, 0,  1,  2,  3             *
+[ 3]: Fact=12:    11, 10, 0,  1,  2             *
+--- 4x4, Mode=21 ---
+[ 0]: Fact=15:    0,  1,  2,  3,  4             *
+[ 1]: Fact=30:    10, 0,  1,  2,  3             *
+[ 2]: Fact=13:    10, 0,  1,  2,  3             *
+[ 3]: Fact=28:    12, 10, 0,  1,  2             *
+--- 4x4, Mode=22 ---
+[ 0]: Fact=19:    0,  1,  2,  3,  4             *
+[ 1]: Fact= 6:    0,  1,  2,  3,  4             *
+[ 2]: Fact=25:    10, 0,  1,  2,  3             *
+[ 3]: Fact=12:    10, 0,  1,  2,  3             *
+--- 4x4, Mode=23 ---
+[ 0]: Fact=23:    0,  1,  2,  3,  4             *
+[ 1]: Fact=14:    0,  1,  2,  3,  4             *
+[ 2]: Fact= 5:    0,  1,  2,  3,  4             *
+[ 3]: Fact=28:    12, 0,  1,  2,  3             *
+--- 4x4, Mode=24 ---
+[ 0]: Fact=27:    0,  1,  2,  3,  4             *
+[ 1]: Fact=22:    0,  1,  2,  3,  4             *
+[ 2]: Fact=17:    0,  1,  2,  3,  4             *
+[ 3]: Fact=12:    0,  1,  2,  3,  4             *
+--- 4x4, Mode=25 ---
+[ 0]: Fact=30:    0,  1,  2,  3,  4             *
+[ 1]: Fact=28:    0,  1,  2,  3,  4             *
+[ 2]: Fact=26:    0,  1,  2,  3,  4             *
+[ 3]: Fact=24:    0,  1,  2,  3,  4             *
+--- 4x4, Mode=26 ---
+[ 0]: Fact= 0:    1,  2,  3,  4,                *
+[ 1]: Fact= 0:    1,  2,  3,  4,                *
+[ 2]: Fact= 0:    1,  2,  3,  4,                *
+[ 3]: Fact= 0:    1,  2,  3,  4,                *
+--- 4x4, Mode=27 ---
+[ 0]: Fact= 2:    1,  2,  3,  4,  5,            *
+[ 1]: Fact= 4:    1,  2,  3,  4,  5,            *
+[ 2]: Fact= 6:    1,  2,  3,  4,  5,            *
+[ 3]: Fact= 8:    1,  2,  3,  4,  5,            *
+--- 4x4, Mode=28 ---
+[ 0]: Fact= 5:    1,  2,  3,  4,  5,            *
+[ 1]: Fact=10:    1,  2,  3,  4,  5,            *
+[ 2]: Fact=15:    1,  2,  3,  4,  5,            *
+[ 3]: Fact=20:    1,  2,  3,  4,  5,            *
+--- 4x4, Mode=29 ---
+[ 0]: Fact= 9:    1,  2,  3,  4,  5,            *
+[ 1]: Fact=18:    1,  2,  3,  4,  5,            *
+[ 2]: Fact=27:    1,  2,  3,  4,  5,            *
+[ 3]: Fact= 4:    2,  3,  4,  5,  6,            *
+--- 4x4, Mode=30 ---
+[ 0]: Fact=13:    1,  2,  3,  4,  5,            *
+[ 1]: Fact=26:    1,  2,  3,  4,  5,            *
+[ 2]: Fact= 7:    2,  3,  4,  5,  6,            *
+[ 3]: Fact=20:    2,  3,  4,  5,  6,            *
+--- 4x4, Mode=31 ---
+[ 0]: Fact=17:    1,  2,  3,  4,  5,            *
+[ 1]: Fact= 2:    2,  3,  4,  5,  6,            *
+[ 2]: Fact=19:    2,  3,  4,  5,  6,            *
+[ 3]: Fact= 4:    3,  4,  5,  6,  7,            *
+--- 4x4, Mode=32 ---
+[ 0]: Fact=21:    1,  2,  3,  4,  5,            *
+[ 1]: Fact=10:    2,  3,  4,  5,  6,            *
+[ 2]: Fact=31:    2,  3,  4,  5,  6,            *
+[ 3]: Fact=20:    3,  4,  5,  6,  7,            *
+--- 4x4, Mode=33 ---
+[ 0]: Fact=26:    1,  2,  3,  4,  5,            *
+[ 1]: Fact=20:    2,  3,  4,  5,  6,            *
+[ 2]: Fact=14:    3,  4,  5,  6,  7,            *
+[ 3]: Fact= 8:    4,  5,  6,  7,  8,            *
+--- 4x4, Mode=34 ---
+[ 0]: Fact= 0:    2,  3,  4,  5,                *
+[ 1]: Fact= 0:    3,  4,  5,  6,                *
+[ 2]: Fact= 0:    4,  5,  6,  7,                *
+[ 3]: Fact= 0:    5,  6,  7,  8,                *
+
--- a/x265/doc/intra/intra-8x8.txt
+++ b/x265/doc/intra/intra-8x8.txt
@ -0,0 +1,298 @@
+--- 8x8, Mode= 2 ---
+[ 0]: Fact= 0:	 -2, -3, -4, -5, -6, -7, -8, -9,            *
+[ 1]: Fact= 0:	 -3, -4, -5, -6, -7, -8, -9,-10,            *
+[ 2]: Fact= 0:	 -4, -5, -6, -7, -8, -9,-10,-11,            *
+[ 3]: Fact= 0:	 -5, -6, -7, -8, -9,-10,-11,-12,            *
+[ 4]: Fact= 0:	 -6, -7, -8, -9,-10,-11,-12,-13,            *
+[ 5]: Fact= 0:	 -7, -8, -9,-10,-11,-12,-13,-14,            *
+[ 6]: Fact= 0:	 -8, -9,-10,-11,-12,-13,-14,-15,            *
+[ 7]: Fact= 0:	 -9,-10,-11,-12,-13,-14,-15,-16,            *
+--- 8x8, Mode= 3 ---                                        
+[ 0]: Fact=26:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact=20:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 2]: Fact=14:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 3]: Fact= 8:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 4]: Fact= 2:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 5]: Fact=28:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 6]: Fact=22:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+[ 7]: Fact=16:	 -7, -8, -9,-10,-11,-12,-13,-14,-15,        *
+--- 8x8, Mode= 4 ---
+[ 0]: Fact=21:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact=10:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 2]: Fact=31:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 3]: Fact=20:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 4]: Fact= 9:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 5]: Fact=30:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 6]: Fact=19:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+[ 7]: Fact= 8:	 -6, -7, -8, -9,-10,-11,-12,-13,-14,        *
+--- 8x8, Mode= 5 ---
+[ 0]: Fact=17:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact= 2:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 2]: Fact=19:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 3]: Fact= 4:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 4]: Fact=21:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 5]: Fact= 6:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 6]: Fact=23:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+[ 7]: Fact= 8:	 -5, -6, -7, -8, -9,-10,-11,-12,-13,        *
+--- 8x8, Mode= 6 ---
+[ 0]: Fact=13:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact=26:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 2]: Fact= 7:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 3]: Fact=20:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 4]: Fact= 1:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 5]: Fact=14:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 6]: Fact=27:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+[ 7]: Fact= 8:	 -4, -5, -6, -7, -8, -9,-10,-11,-12,        *
+--- 8x8, Mode= 7 ---
+[ 0]: Fact= 9:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact=18:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 2]: Fact=27:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 3]: Fact= 4:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 4]: Fact=13:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 5]: Fact=22:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 6]: Fact=31:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 7]: Fact= 8:	 -3, -4, -5, -6, -7, -8, -9,-10,-11,        *
+--- 8x8, Mode= 8 ---
+[ 0]: Fact= 5:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact=10:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 2]: Fact=15:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 3]: Fact=20:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 4]: Fact=25:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 5]: Fact=30:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 6]: Fact= 3:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+[ 7]: Fact= 8:	 -2, -3, -4, -5, -6, -7, -8, -9,-10,        *
+--- 8x8, Mode= 9 ---
+[ 0]: Fact= 2:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 1]: Fact= 4:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 2]: Fact= 6:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 3]: Fact= 8:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 4]: Fact=10:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 5]: Fact=12:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 6]: Fact=14:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+[ 7]: Fact=16:	 -1, -2, -3, -4, -5, -6, -7, -8, -9,        *
+--- 8x8, Mode=10 ---
+[ 0]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 1]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 2]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 3]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 4]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 5]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 6]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+[ 7]: Fact= 0:	 -1, -2, -3, -4, -5, -6, -7, -8,            *
+--- 8x8, Mode=11 ---
+[ 0]: Fact=30:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact=28:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 2]: Fact=26:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 3]: Fact=24:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 4]: Fact=22:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 5]: Fact=20:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 6]: Fact=18:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 7]: Fact=16:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+--- 8x8, Mode=12 ---
+[ 0]: Fact=27:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact=22:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 2]: Fact=17:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 3]: Fact=12:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 4]: Fact= 7:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 5]: Fact= 2:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 6]: Fact=29:	  6,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 7]: Fact=24:	  6,  0, -1, -2, -3, -4, -5, -6, -7,        *
+--- 8x8, Mode=13 ---
+[ 0]: Fact=23:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact=14:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 2]: Fact= 5:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 3]: Fact=28:	  4,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 4]: Fact=19:	  4,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 5]: Fact=10:	  4,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 6]: Fact= 1:	  4,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 7]: Fact=24:	  7,  4,  0, -1, -2, -3, -4, -5, -6,        *
+--- 8x8, Mode=14 ---
+[ 0]: Fact=19:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact= 6:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 2]: Fact=25:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 3]: Fact=12:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 4]: Fact=31:	  5,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 5]: Fact=18:	  5,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 6]: Fact= 5:	  5,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 7]: Fact=24:	  7,  5,  2,  0, -1, -2, -3, -4, -5,        *
+--- 8x8, Mode=15 ---
+[ 0]: Fact=15:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact=30:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 2]: Fact=13:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 3]: Fact=28:	  4,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 4]: Fact=11:	  4,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 5]: Fact=26:	  6,  4,  2,  0, -1, -2, -3, -4, -5,        *
+[ 6]: Fact= 9:	  6,  4,  2,  0, -1, -2, -3, -4, -5,        *
+[ 7]: Fact=24:	  8,  6,  4,  2,  0, -1, -2, -3, -4,        *
+--- 8x8, Mode=16 ---
+[ 0]: Fact=11:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        *
+[ 1]: Fact=22:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 2]: Fact= 1:	  2,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 3]: Fact=12:	  3,  2,  0, -1, -2, -3, -4, -5, -6,        *
+[ 4]: Fact=23:	  5,  3,  2,  0, -1, -2, -3, -4, -5,        *
+[ 5]: Fact= 2:	  5,  3,  2,  0, -1, -2, -3, -4, -5,        *
+[ 6]: Fact=13:	  6,  5,  3,  2,  0, -1, -2, -3, -4,        *
+[ 7]: Fact=24:	  8,  6,  5,  3,  2,  0, -1, -2, -3,        *
+--- 8x8, Mode=17 ---
+[ 0]: Fact= 6:	  0, -1, -2, -3, -4, -5, -6, -7, -8,        x
+[ 1]: Fact=12:	  1,  0, -1, -2, -3, -4, -5, -6, -7,        *
+[ 2]: Fact=18:	  2,  1,  0, -1, -2, -3, -4, -5, -6,        *
+[ 3]: Fact=24:	  4,  2,  1,  0, -1, -2, -3, -4, -5,        *
+[ 4]: Fact=30:	  5,  4,  2,  1,  0, -1, -2, -3, -4,        *
+[ 5]: Fact= 4:	  5,  4,  2,  1,  0, -1, -2, -3, -4,        *
+[ 6]: Fact=10:	  6,  5,  4,  2,  1,  0, -1, -2, -3,        *
+[ 7]: Fact=16:	  7,  6,  5,  4,  2,  1,  0, -1, -2,        *
+--- 8x8, Mode=18 ---
+[ 0]: Fact= 0:	  0,  1,  2,  3,  4,  5,  6,  7,            *
+[ 1]: Fact= 0:	 -1,  0,  1,  2,  3,  4,  5,  6,            *
+[ 2]: Fact= 0:	 -2, -1,  0,  1,  2,  3,  4,  5,            *
+[ 3]: Fact= 0:	 -3, -2, -1,  0,  1,  2,  3,  4,            *
+[ 4]: Fact= 0:	 -4, -3, -2, -1,  0,  1,  2,  3,            *
+[ 5]: Fact= 0:	 -5, -4, -3, -2, -1,  0,  1,  2,            *
+[ 6]: Fact= 0:	 -6, -5, -4, -3, -2, -1,  0,  1,            *
+[ 7]: Fact= 0:	 -7, -6, -5, -4, -3, -2, -1,  0,            *
+--- 8x8, Mode=19 ---
+[ 0]: Fact= 6:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        x
+[ 1]: Fact=12:	 -1,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 2]: Fact=18:	 -2, -1,  0,  1,  2,  3,  4,  5,  6,        *
+[ 3]: Fact=24:	 -4, -2, -1,  0,  1,  2,  3,  4,  5,        *
+[ 4]: Fact=30:	 -5, -4, -2, -1,  0,  1,  2,  3,  4,        *
+[ 5]: Fact= 4:	 -5, -4, -2, -1,  0,  1,  2,  3,  4,        *
+[ 6]: Fact=10:	 -6, -5, -4, -2, -1,  0,  1,  2,  3,        *
+[ 7]: Fact=16:	 -7, -6, -5, -4, -2, -1,  0,  1,  2,        *
+--- 8x8, Mode=20 ---
+[ 0]: Fact=11:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact=22:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 2]: Fact= 1:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 3]: Fact=12:	 -3, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 4]: Fact=23:	 -5, -3, -2,  0,  1,  2,  3,  4,  5,        *
+[ 5]: Fact= 2:	 -5, -3, -2,  0,  1,  2,  3,  4,  5,        *
+[ 6]: Fact=13:	 -6, -5, -3, -2,  0,  1,  2,  3,  4,        *
+[ 7]: Fact=24:	 -8, -6, -5, -3, -2,  0,  1,  2,  3,        *
+--- 8x8, Mode=21 ---
+[ 0]: Fact=15:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact=30:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 2]: Fact=13:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 3]: Fact=28:	 -4, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 4]: Fact=11:	 -4, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 5]: Fact=26:	 -6, -4, -2,  0,  1,  2,  3,  4,  5,        *
+[ 6]: Fact= 9:	 -6, -4, -2,  0,  1,  2,  3,  4,  5,        *
+[ 7]: Fact=24:	 -8, -6, -4, -2,  0,  1,  2,  3,  4,        *
+--- 8x8, Mode=22 ---
+[ 0]: Fact=19:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact= 6:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 2]: Fact=25:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 3]: Fact=12:	 -2,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 4]: Fact=31:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 5]: Fact=18:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 6]: Fact= 5:	 -5, -2,  0,  1,  2,  3,  4,  5,  6,        *
+[ 7]: Fact=24:	 -7, -5, -2,  0,  1,  2,  3,  4,  5,        *
+--- 8x8, Mode=23 ---
+[ 0]: Fact=23:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact=14:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 2]: Fact= 5:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 3]: Fact=28:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 4]: Fact=19:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 5]: Fact=10:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 6]: Fact= 1:	 -4,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 7]: Fact=24:	 -7, -4,  0,  1,  2,  3,  4,  5,  6,        *
+--- 8x8, Mode=24 ---
+[ 0]: Fact=27:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact=22:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 2]: Fact=17:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 3]: Fact=12:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 4]: Fact= 7:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 5]: Fact= 2:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 6]: Fact=29:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,        *
+[ 7]: Fact=24:	 -6,  0,  1,  2,  3,  4,  5,  6,  7,        *
+--- 8x8, Mode=25 ---
+[ 0]: Fact=30:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 1]: Fact=28:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 2]: Fact=26:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 3]: Fact=24:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 4]: Fact=22:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 5]: Fact=20:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 6]: Fact=18:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+[ 7]: Fact=16:	  0,  1,  2,  3,  4,  5,  6,  7,  8,        *
+--- 8x8, Mode=26 ---
+[ 0]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 1]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 2]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 3]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 4]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 5]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 6]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+[ 7]: Fact= 0:	  1,  2,  3,  4,  5,  6,  7,  8,            *
+--- 8x8, Mode=27 ---
+[ 0]: Fact= 2:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact= 4:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 2]: Fact= 6:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 3]: Fact= 8:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 4]: Fact=10:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 5]: Fact=12:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 6]: Fact=14:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 7]: Fact=16:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+--- 8x8, Mode=28 ---
+[ 0]: Fact= 5:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact=10:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 2]: Fact=15:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 3]: Fact=20:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 4]: Fact=25:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 5]: Fact=30:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 6]: Fact= 3:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 7]: Fact= 8:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+--- 8x8, Mode=29 ---
+[ 0]: Fact= 9:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact=18:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 2]: Fact=27:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 3]: Fact= 4:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 4]: Fact=13:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 5]: Fact=22:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 6]: Fact=31:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 7]: Fact= 8:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+--- 8x8, Mode=30 ---
+[ 0]: Fact=13:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact=26:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 2]: Fact= 7:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 3]: Fact=20:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 4]: Fact= 1:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 5]: Fact=14:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 6]: Fact=27:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 7]: Fact= 8:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+--- 8x8, Mode=31 ---
+[ 0]: Fact=17:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact= 2:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 2]: Fact=19:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 3]: Fact= 4:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 4]: Fact=21:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 5]: Fact= 6:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 6]: Fact=23:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 7]: Fact= 8:	  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+--- 8x8, Mode=32 ---
+[ 0]: Fact=21:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact=10:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 2]: Fact=31:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 3]: Fact=20:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 4]: Fact= 9:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 5]: Fact=30:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 6]: Fact=19:	  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 7]: Fact= 8:	  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+--- 8x8, Mode=33 ---
+[ 0]: Fact=26:	  1,  2,  3,  4,  5,  6,  7,  8,  9,        *
+[ 1]: Fact=20:	  2,  3,  4,  5,  6,  7,  8,  9, 10,        *
+[ 2]: Fact=14:	  3,  4,  5,  6,  7,  8,  9, 10, 11,        *
+[ 3]: Fact= 8:	  4,  5,  6,  7,  8,  9, 10, 11, 12,        *
+[ 4]: Fact= 2:	  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 5]: Fact=28:	  5,  6,  7,  8,  9, 10, 11, 12, 13,        *
+[ 6]: Fact=22:	  6,  7,  8,  9, 10, 11, 12, 13, 14,        *
+[ 7]: Fact=16:	  7,  8,  9, 10, 11, 12, 13, 14, 15,        *
+--- 8x8, Mode=34 ---
+[ 0]: Fact= 0:	  2,  3,  4,  5,  6,  7,  8,  9,            *
+[ 1]: Fact= 0:	  3,  4,  5,  6,  7,  8,  9, 10,            *
+[ 2]: Fact= 0:	  4,  5,  6,  7,  8,  9, 10, 11,            *
+[ 3]: Fact= 0:	  5,  6,  7,  8,  9, 10, 11, 12,            *
+[ 4]: Fact= 0:	  6,  7,  8,  9, 10, 11, 12, 13,            *
+[ 5]: Fact= 0:	  7,  8,  9, 10, 11, 12, 13, 14,            *
+[ 6]: Fact= 0:	  8,  9, 10, 11, 12, 13, 14, 15,            *
+[ 7]: Fact= 0:	  9, 10, 11, 12, 13, 14, 15, 16,            *
+
--- a/x265/doc/reST/Makefile
+++ b/x265/doc/reST/Makefile
@ -0,0 +1,109 @@
+# Makefile for (Sphinx based) restructured text documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+
+QCOLLECTIONGENERATOR = qcollectiongenerator
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean distclean html web pickle htmlhelp qthelp qhc latex changes linkcheck
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  qhc       to make QHC file"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  man       to make manpages"
+	@echo "  changes   to make an overview over all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+
+clean:
+	-rm -rf build/*
+
+distclean: clean
+	-rmdir build/
+
+html:
+	mkdir -p build/html build/doctrees
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
+	@echo
+	@echo "Build finished. The HTML pages are in build/html."
+
+zip: html
+	(cd build ; zip TortoiseHg.html.zip -r html)
+
+pickle:
+	mkdir -p build/pickle build/doctrees
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+web: pickle
+
+json:
+	mkdir -p build/json build/doctrees
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	mkdir -p build/htmlhelp build/doctrees
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in build/htmlhelp."
+
+qthelp:
+	mkdir -p build/qthelp build/doctrees
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) build/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in build/qthelp, like this:"
+	@echo "# qcollectiongenerator build/qthelp/foo.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile build/qthelp/foo.qhc"
+
+qhc: qthelp
+	$(QCOLLECTIONGENERATOR) build/qthelp/TortoiseHg.qhcp
+	@echo "Build finished. To view the help file:"
+	@echo "# assistant -collectionFile build/qthelp/TortoiseHg.qhc"
+
+latex:
+	mkdir -p build/latex build/doctrees
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in build/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+
+man:
+	mkdir -p build/man build/doctrees
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) build/man
+	@echo
+	@echo "Build finished; the manpages are in build/man."
+	@echo "Run \`man -l build/man/x265.1' or \`man -l build/man/libx265.3'" \
+	      "to view them."
+
+changes:
+	mkdir -p build/changes build/doctrees
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
+	@echo
+	@echo "The overview file is in build/changes."
+
+linkcheck:
+	mkdir -p build/linkcheck build/doctrees
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in build/linkcheck/output.txt."
--- a/x265/doc/reST/api.rst
+++ b/x265/doc/reST/api.rst
@ -0,0 +1,511 @@
+*********************************
+Application Programming Interface
+*********************************
+
+Introduction
+============
+
+x265 is written primarily in C++ and x86 assembly language but the
+public facing programming interface is C for the widest possible
+portability.  This C interface is wholly defined within :file:`x265.h`
+in the source/ folder of our source tree.  All of the functions and
+variables and enumerations meant to be used by the end-user are present
+in this header.
+
+Where possible, x265 has tried to keep its public API as close as
+possible to x264's public API. So those familiar with using x264 through
+its C interface will find x265 quite familiar.
+
+This file is meant to be read in-order; the narrative follows linearly
+through the various sections
+
+Build Considerations
+====================
+
+The choice of Main or Main10 profile encodes is made at compile time;
+the internal pixel depth influences a great deal of variable sizes and
+thus 8 and 10bit pixels are handled as different build options
+(primarily to maintain the performance of the 8bit builds). libx265
+exports a variable **x265_max_bit_depth** which indicates how the
+library was compiled (it will contain a value of 8 or 10). Further,
+**x265_version_str** is a pointer to a string indicating the version of
+x265 which was compiled, and **x265_build_info_str** is a pointer to a
+string identifying the compiler and build options.
+
+.. Note::
+
+	**x265_version_str** is only updated when **cmake** runs. If you are
+	making binaries for others to use, it is recommended to run
+	**cmake** prior to **make** in your build scripts.
+
+x265 will accept input pixels of any depth between 8 and 16 bits
+regardless of the depth of its internal pixels (8 or 10).  It will shift
+and mask input pixels as required to reach the internal depth. If
+downshifting is being performed using our CLI application (to 8 bits),
+the :option:`--dither` option may be enabled to reduce banding. This
+feature is not available through the C interface.
+
+Encoder
+=======
+
+The primary object in x265 is the encoder object, and this is
+represented in the public API as an opaque typedef **x265_encoder**.
+Pointers of this type are passed to most encoder functions.
+
+A single encoder generates a single output bitstream from a sequence of
+raw input pictures.  Thus if you need multiple output bitstreams you
+must allocate multiple encoders.  You may pass the same input pictures
+to multiple encoders, the encode function does not modify the input
+picture structures (the pictures are copied into the encoder as the
+first step of encode).
+
+Encoder allocation is a reentrant function, so multiple encoders may be
+safely allocated in a single process. The encoder access functions are
+not reentrant for a single encoder, so the recommended use case is to
+allocate one client thread per encoder instance (one thread for all
+encoder instances is possible, but some encoder access functions are
+blocking and thus this would be less efficient).
+
+.. Note::
+
+	There is one caveat to having multiple encoders within a single
+	process. All of the encoders must use the same maximum CTU size
+	because many global variables are configured based on this size.
+	Encoder allocation will fail if a mis-matched CTU size is attempted.
+	If no encoders are open, **x265_cleanup()** can be called to reset
+	the configured CTU size so a new size can be used.
+
+An encoder is allocated by calling **x265_encoder_open()**::
+
+	/* x265_encoder_open:
+	 *      create a new encoder handler, all parameters from x265_param are copied */
+	x265_encoder* x265_encoder_open(x265_param *);
+
+The returned pointer is then passed to all of the functions pertaining
+to this encode. A large amount of memory is allocated during this
+function call, but the encoder will continue to allocate memory as the
+first pictures are passed to the encoder; until its pool of picture
+structures is large enough to handle all of the pictures it must keep
+internally.  The pool size is determined by the lookahead depth, the
+number of frame threads, and the maximum number of references.
+
+As indicated in the comment, **x265_param** is copied internally so the user
+may release their copy after allocating the encoder.  Changes made to
+their copy of the param structure have no affect on the encoder after it
+has been allocated.
+
+Param
+=====
+
+The **x265_param** structure describes everything the encoder needs to
+know about the input pictures and the output bitstream and most
+everything in between.
+
+The recommended way to handle these param structures is to allocate them
+from libx265 via::
+
+	/* x265_param_alloc:
+	 *  Allocates an x265_param instance. The returned param structure is not
+	 *  special in any way, but using this method together with x265_param_free()
+	 *  and x265_param_parse() to set values by name allows the application to treat
+	 *  x265_param as an opaque data struct for version safety */
+	x265_param *x265_param_alloc();
+
+In this way, your application does not need to know the exact size of
+the param structure (the build of x265 could potentially be a bit newer
+than the copy of :file:`x265.h` that your application compiled against).
+
+Next you perform the initial *rough cut* configuration of the encoder by
+chosing a performance preset and optional tune factor
+**x265_preset_names** and **x265_tune_names** respectively hold the
+string names of the presets and tune factors (see :ref:`presets
+<preset-tune-ref>` for more detail on presets and tune factors)::
+
+	/*      returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
+	int x265_param_default_preset(x265_param *, const char *preset, const char *tune);
+
+Now you may optionally specify a profile. **x265_profile_names**
+contains the string names this function accepts::
+
+	/*      (can be NULL, in which case the function will do nothing)
+	 *      returns 0 on success, negative on failure (e.g. invalid profile name). */
+	int x265_param_apply_profile(x265_param *, const char *profile);
+
+Finally you configure any remaining options by name using repeated calls to::
+
+	/* x265_param_parse:
+	 *  set one parameter by name.
+	 *  returns 0 on success, or returns one of the following errors.
+	 *  note: BAD_VALUE occurs only if it can't even parse the value,
+	 *  numerical range is not checked until x265_encoder_open().
+	 *  value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */
+	#define X265_PARAM_BAD_NAME  (-1)
+	#define X265_PARAM_BAD_VALUE (-2)
+	int x265_param_parse(x265_param *p, const char *name, const char *value);
+
+See :ref:`string options <string-options-ref>` for the list of options (and their
+descriptions) which can be set by **x265_param_parse()**.
+
+After the encoder has been created, you may release the param structure::
+
+	/* x265_param_free:
+	 *  Use x265_param_free() to release storage for an x265_param instance
+	 *  allocated by x265_param_alloc() */
+	void x265_param_free(x265_param *);
+
+.. Note::
+
+	Using these methods to allocate and release the param structures
+	helps future-proof your code in many ways, but the x265 API is
+	versioned in such a way that we prevent linkage against a build of
+	x265 that does not match the version of the header you are compiling
+	against (unless you use x265_api_query() to acquire the library's
+	interfaces). This is function of the X265_BUILD macro.
+
+**x265_encoder_parameters()** may be used to get a copy of the param
+structure from the encoder after it has been opened, in order to see the
+changes made to the parameters for auto-detection and other reasons::
+
+	/* x265_encoder_parameters:
+	 *      copies the current internal set of parameters to the pointer provided
+	 *      by the caller.  useful when the calling application needs to know
+	 *      how x265_encoder_open has changed the parameters.
+	 *      note that the data accessible through pointers in the returned param struct
+	 *      (e.g. filenames) should not be modified by the calling application. */
+	void x265_encoder_parameters(x265_encoder *, x265_param *);
+
+**x265_encoder_reconfig()** may be used to reconfigure encoder parameters mid-encode::
+
+	/* x265_encoder_reconfig:
+	 *       used to modify encoder parameters.
+	 *      various parameters from x265_param are copied.
+	 *      this takes effect immediately, on whichever frame is encoded next;
+	 *      returns 0 on success, negative on parameter validation error.
+	 *
+	 *      not all parameters can be changed; see the actual function for a
+	 *      detailed breakdown.  since not all parameters can be changed, moving
+	 *      from preset to preset may not always fully copy all relevant parameters,
+	 *      but should still work usably in practice. however, more so than for
+	 *      other presets, many of the speed shortcuts used in ultrafast cannot be
+	 *      switched out of; using reconfig to switch between ultrafast and other
+	 *      presets is not recommended without a more fine-grained breakdown of
+	 *      parameters to take this into account. */
+	int x265_encoder_reconfig(x265_encoder *, x265_param *);
+
+Pictures
+========
+
+Raw pictures are passed to the encoder via the **x265_picture** structure.
+Just like the param structure we recommend you allocate this structure
+from the encoder to avoid potential size mismatches::
+
+	/* x265_picture_alloc:
+	 *  Allocates an x265_picture instance. The returned picture structure is not
+	 *  special in any way, but using this method together with x265_picture_free()
+	 *  and x265_picture_init() allows some version safety. New picture fields will
+	 *  always be added to the end of x265_picture */
+	x265_picture *x265_picture_alloc();
+
+Regardless of whether you allocate your picture structure this way or
+whether you simply declare it on the stack, your next step is to
+initialize the structure via::
+
+	/***
+	 * Initialize an x265_picture structure to default values. It sets the pixel
+	 * depth and color space to the encoder's internal values and sets the slice
+	 * type to auto - so the lookahead will determine slice type.
+	 */
+	void x265_picture_init(x265_param *param, x265_picture *pic);
+
+x265 does not perform any color space conversions, so the raw picture's
+color space (chroma sampling) must match the color space specified in
+the param structure used to allocate the encoder. **x265_picture_init**
+initializes this field to the internal color space and it is best to
+leave it unmodified.
+
+The picture bit depth is initialized to be the encoder's internal bit
+depth but this value should be changed to the actual depth of the pixels
+being passed into the encoder.  If the picture bit depth is more than 8,
+the encoder assumes two bytes are used to represent each sample
+(little-endian shorts).
+
+The user is responsible for setting the plane pointers and plane strides
+(in units of bytes, not pixels). The presentation time stamp (**pts**)
+is optional, depending on whether you need accurate decode time stamps
+(**dts**) on output.
+
+If you wish to override the lookahead or rate control for a given
+picture you may specify a slicetype other than X265_TYPE_AUTO, or a
+forceQP value other than 0.
+
+x265 does not modify the picture structure provided as input, so you may
+reuse a single **x265_picture** for all pictures passed to a single
+encoder, or even all pictures passed to multiple encoders.
+
+Structures allocated from the library should eventually be released::
+
+	/* x265_picture_free:
+	 *  Use x265_picture_free() to release storage for an x265_picture instance
+	 *  allocated by x265_picture_alloc() */
+	void x265_picture_free(x265_picture *);
+
+
+Analysis Buffers
+================
+
+Analysis information can be saved and reused to between encodes of the
+same video sequence (generally for multiple bitrate encodes).  The best
+results are attained by saving the analysis information of the highest
+bitrate encode and reuse it in lower bitrate encodes.
+
+When saving or loading analysis data, buffers must be allocated for
+every picture passed into the encoder using::
+
+	/* x265_alloc_analysis_data:
+	 *  Allocate memory to hold analysis meta data, returns 1 on success else 0 */
+	int x265_alloc_analysis_data(x265_picture*);
+
+Note that this is very different from the typical semantics of
+**x265_picture**, which can be reused many times. The analysis buffers must
+be re-allocated for every input picture.
+
+Analysis buffers passed to the encoder are owned by the encoder until
+they pass the buffers back via an output **x265_picture**. The user is
+responsible for releasing the buffers when they are finished with them
+via::
+
+	/* x265_free_analysis_data:
+	 *  Use x265_free_analysis_data to release storage of members allocated by
+	 *  x265_alloc_analysis_data */
+	void x265_free_analysis_data(x265_picture*);
+
+
+Encode Process
+==============
+
+The output of the encoder is a series of NAL packets, which are always
+returned concatenated in consecutive memory. HEVC streams have SPS and
+PPS and VPS headers which describe how the following packets are to be
+decoded. If you specified :option:`--repeat-headers` then those headers
+will be output with every keyframe.  Otherwise you must explicitly query
+those headers using::
+
+	/* x265_encoder_headers:
+	 *      return the SPS and PPS that will be used for the whole stream.
+	 *      *pi_nal is the number of NAL units outputted in pp_nal.
+	 *      returns negative on error, total byte size of payload data on success
+	 *      the payloads of all output NALs are guaranteed to be sequential in memory. */
+	int x265_encoder_headers(x265_encoder *, x265_nal **pp_nal, uint32_t *pi_nal);
+
+Now we get to the main encode loop. Raw input pictures are passed to the
+encoder in display order via::
+
+	/* x265_encoder_encode:
+	 *      encode one picture.
+	 *      *pi_nal is the number of NAL units outputted in pp_nal.
+	 *      returns negative on error, zero if no NAL units returned.
+	 *      the payloads of all output NALs are guaranteed to be sequential in memory. */
+	int x265_encoder_encode(x265_encoder *encoder, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out);
+
+These pictures are queued up until the lookahead is full, and then the
+frame encoders in turn are filled, and then finally you begin receiving
+a output NALs (corresponding to a single output picture) with each input
+picture you pass into the encoder.
+
+Once the pipeline is completely full, **x265_encoder_encode()** will
+block until the next output picture is complete.
+
+.. note:: 
+
+	Optionally, if the pointer of a second **x265_picture** structure is
+	provided, the encoder will fill it with data pertaining to the
+	output picture corresponding to the output NALs, including the
+	recontructed image, POC and decode timestamp. These pictures will be
+	in encode (or decode) order.
+
+When the last of the raw input pictures has been sent to the encoder,
+**x265_encoder_encode()** must still be called repeatedly with a
+*pic_in* argument of 0, indicating a pipeline flush, until the function
+returns a value less than or equal to 0 (indicating the output bitstream
+is complete).
+
+At any time during this process, the application may query running
+statistics from the encoder::
+
+	/* x265_encoder_get_stats:
+	 *       returns encoder statistics */
+	void x265_encoder_get_stats(x265_encoder *encoder, x265_stats *, uint32_t statsSizeBytes);
+
+Cleanup
+=======
+
+Finally, the encoder must be closed in order to free all of its
+resources. An encoder that has been flushed cannot be restarted and
+reused. Once **x265_encoder_close()** has been called, the encoder
+handle must be discarded::
+
+	/* x265_encoder_close:
+	 *      close an encoder handler */
+	void x265_encoder_close(x265_encoder *);
+
+When the application has completed all encodes, it should call
+**x265_cleanup()** to free process global, particularly if a memory-leak
+detection tool is being used. **x265_cleanup()** also resets the saved
+CTU size so it will be possible to create a new encoder with a different
+CTU size::
+
+	/* x265_cleanup:
+	 *     release library static allocations, reset configured CTU size */
+	void x265_cleanup(void);
+
+
+Multi-library Interface
+=======================
+
+If your application might want to make a runtime bit-depth selection, it
+will need to use one of these bit-depth introspection interfaces which
+returns an API structure containing the public function entry points and
+constants.
+
+Instead of directly using all of the **x265_** methods documented above,
+you query an x265_api structure from your libx265 and then use the
+function pointers of the same name (minus the **x265_** prefix) within
+that structure.  For instance **x265_param_default()** becomes
+**api->param_default()**.
+
+x265_api_get
+------------
+
+The first bit-depth instrospecton method is x265_api_get(). It designed
+for applications that might statically link with libx265, or will at
+least be tied to a particular SONAME or API version::
+
+	/* x265_api_get:
+	 *   Retrieve the programming interface for a linked x265 library.
+	 *   May return NULL if no library is available that supports the
+	 *   requested bit depth. If bitDepth is 0, the function is guarunteed
+	 *   to return a non-NULL x265_api pointer from the system default
+	 *   libx265 */
+	const x265_api* x265_api_get(int bitDepth);
+
+Like **x265_encoder_encode()**, this function has the build number
+automatically appended to the function name via macros. This ties your
+application to a particular binary API version of libx265 (the one you
+compile against). If you attempt to link with a libx265 with a different
+API version number, the link will fail.
+
+Obviously this has no meaningful effect on applications which statically
+link to libx265.
+
+x265_api_query
+--------------
+
+The second bit-depth introspection method is designed for applications
+which need more flexibility in API versioning.  If you use
+**x265_api_query()** and dynamically link to libx265 at runtime (using
+dlopen() on POSIX or LoadLibrary() on Windows) your application is no
+longer directly tied to the API version that it was compiled against::
+
+	/* x265_api_query:
+	 *   Retrieve the programming interface for a linked x265 library, like
+	 *   x265_api_get(), except this function accepts X265_BUILD as the second
+	 *   argument rather than using the build number as part of the function name.
+	 *   Applications which dynamically link to libx265 can use this interface to
+	 *   query the library API and achieve a relative amount of version skew
+	 *   flexibility. The function may return NULL if the library determines that
+	 *   the apiVersion that your application was compiled against is not compatible
+	 *   with the library you have linked with.
+	 *
+	 *   api_major_version will be incremented any time non-backward compatible
+	 *   changes are made to any public structures or functions. If
+	 *   api_major_version does not match X265_MAJOR_VERSION from the x265.h your
+	 *   application compiled against, your application must not use the returned
+	 *   x265_api pointer.
+	 *
+	 *   Users of this API *must* also validate the sizes of any structures which
+	 *   are not treated as opaque in application code. For instance, if your
+	 *   application dereferences a x265_param pointer, then it must check that
+	 *   api->sizeof_param matches the sizeof(x265_param) that your application
+	 *   compiled with. */
+	const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
+
+A number of validations must be performed on the returned API structure
+in order to determine if it is safe for use by your application. If you
+do not perform these checks, your application is liable to crash::
+
+	if (api->api_major_version != X265_MAJOR_VERSION) /* do not use */
+	if (api->sizeof_param != sizeof(x265_param))      /* do not use */
+	if (api->sizeof_picture != sizeof(x265_picture))  /* do not use */
+	if (api->sizeof_stats != sizeof(x265_stats))      /* do not use */
+	if (api->sizeof_zone != sizeof(x265_zone))        /* do not use */
+	etc.
+
+Note that if your application does not directly allocate or dereference
+one of these structures, if it treats the structure as opaque or does
+not use it at all, then it can skip the size check for that structure.
+
+In particular, if your application uses api->param_alloc(),
+api->param_free(), api->param_parse(), etc and never directly accesses
+any x265_param fields, then it can skip the check on the
+sizeof(x265_parm) and thereby ignore changes to that structure (which
+account for a large percentage of X265_BUILD bumps).
+
+Build Implications
+------------------
+
+By default libx265 will place all of its internal C++ classes and
+functions within an x265 namespace and export all of the C functions
+documented in this file. Obviously this prevents 8bit and 10bit builds
+of libx265 from being statically linked into a single binary, all of
+those symbols would collide.
+
+However, if you set the EXPORT_C_API cmake option to OFF then libx265
+will use a bit-depth specific namespace and prefix for its assembly
+functions (x265_8bit, x265_10bit or x265_12bit) and export no C
+functions.
+
+In this way you can build one or more libx265 libraries without any
+exported C interface and link them into a libx265 build that does export
+a C interface. The build which exported the C functions becomes the
+*default* bit depth for the combined library, and the other bit depths
+are available via the bit-depth introspection methods.
+
+.. Note::
+
+	When setting EXPORT_C_API cmake option to OFF, it is recommended to
+	also set ENABLE_SHARED and ENABLE_CLI to OFF to prevent build
+	problems.  We only need the static library from these builds.
+
+If an application requests a bit-depth that is not supported by the
+default library or any of the additionally linked libraries, the
+introspection method will fall-back to an attempt to dynamically bind a
+shared library with a name appropriate for the requested bit-depth::
+
+	8-bit:  libx265_main
+	10-bit: libx265_main10
+	12-bit: libx265_main12
+
+If the profile-named library is not found, it will then try to bind a
+generic libx265 in the hopes that it is a multilib library with all bit
+depths.
+
+Packaging and Distribution
+--------------------------
+
+We recommend that packagers distribute a single combined shared/static
+library build which includes all the bit depth libraries linked
+together. See the multilib scripts in our :file:`build/` subdirectories
+for examples of how to affect these combined library builds. It is the
+packager's discretion which bit-depth exports the public C functions and
+thus becomes the default bit-depth for the combined library.
+
+.. Note::
+
+	Windows packagers might want to build libx265 with WINXP_SUPPORT
+	enabled. This makes the resulting binaries functional on XP and
+	Vista. Without this flag, the minimum supported host O/S is Windows
+	7. Also note that binaries built with WINXP_SUPPORT will *not* have
+	NUMA support and they will have slightly less performance.
+
+	STATIC_LINK_CRT is also recommended so end-users will not need to
+	install any additional MSVC C runtime libraries.
--- a/x265/doc/reST/cli.rst
+++ b/x265/doc/reST/cli.rst
--- a/x265/doc/reST/conf.py
+++ b/x265/doc/reST/conf.py
@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+#
+# -- General configuration -----------------------------------------------------
+
+source_suffix = '.rst'
+
+# Name of the master file 
+master_doc = 'index'
+
+# General information about the project.
+project = u'x265'
+
+# This is the Copyright Information that will appear on the bottom of the document
+copyright = u'2014 MulticoreWare Inc'
+
+# -- Options for HTML output ---------------------------------------------------
+html_theme = "default"
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'libx265', 'Full x265 Documentation',
+    ['MulticoreWare Inc'], 3),
+    ('x265', 'x265', 'x265 CLI Documentation',
+    ['MulticoreWare Inc'], 1)
+]
--- a/x265/doc/reST/index.rst
+++ b/x265/doc/reST/index.rst
@ -0,0 +1,11 @@
+x265 Documentation
+======================
+.. toctree::
+   :maxdepth: 2
+
+   introduction
+   cli
+   api
+   threading
+   presets
+   lossless
--- a/x265/doc/reST/introduction.rst
+++ b/x265/doc/reST/introduction.rst
@ -0,0 +1,82 @@
+************
+Introduction
+************
+
+Increasing demand for high definition and ultra-high definition video,
+along with an increasing desire for video on demand has led to
+exponential growth in demand for bandwidth and storage requirements.
+These challenges can be met by the new High Efficiency Video Coding
+(HEVC) standard, also known as H.265. The x265 HEVC encoder project was
+launched by MulticoreWare in 2013, aiming to provide the most efficient,
+highest performance HEVC video encoder.
+
+About HEVC
+==========
+
+The High Efficiency Video Coding (HEVC) was developed by the ISO/IEC
+Moving Picture Experts Group (MPEG) and ITU-T Video Coding Experts Group
+(VCEG), through their Joint Collaborative Team on Video Coding (JCT-VC).
+HEVC is also known as ISO/IEC 23008-2 MPEG-H Part 2 and ITU-T H.265.
+HEVC provides superior video quality and up to twice the data
+compression as the previous standard (H.264/MPEG-4 AVC).  HEVC can
+support 8K Ultra High Definition video, with a picture size up to
+8192x4320 pixels.
+
+About x265
+==========
+
+The primary objective of x265 is to become the best H.265/HEVC encoder
+available anywhere, offering the highest compression efficiency and the
+highest performance on a wide variety of hardware platforms. The x265
+encoder is available as an open source library, published under the
+GPLv2 license. It is also available under a commercial license, enabling
+commercial companies to utilize and distribute x265 in their solutions
+without being subject to the restrictions of the GPL license.
+
+x265 is developed by `MulticoreWare <http://www.multicorewareinc.com>`_,
+leaders in high performance software solutions, with backing from
+leading video technology providers including `Telestream
+<http://www.telestream.com>`_ and `Doremi Labs
+<http://www.doremilabs.com>`_ (and other companies who want to remain
+anonymous at this time), and with contributions from open source
+developers.  x265 leverages many of the outstanding video encoding
+features and optimizations from the x264 AVC encoder project.
+
+The x265 software is available for free under the GNU GPL 2 license,
+from https://bitbucket.org/multicoreware/x265.  For commercial companies
+that wish to distribute x265 without being subject to the open source
+requirements of the GPL 2 license, commercial licenses are available
+with competitive terms.  Contact license @ x265.com to inquire about
+commercial license terms.  
+
+While x265 is primarily designed as a video encoder software library, a
+command-line executable is provided to facilitate testing and
+development.  We expect x265 to be utilized in many leading video
+hardware and software products and services in the coming months.
+
+LEGAL NOTICES
+=============
+
+The x265 software is owned and copyrighted by MulticoreWare, Inc.
+MulticoreWare is committed to offering the x265 software under the GNU
+GPL v2 license.  Companies who do not wish to integrate the x265
+Software in their products under the terms of the GPL license can
+contact MulticoreWare (license @ x265.com) to obtain a commercial
+license agreement.  Companies who use x265 under the GPL may also wish
+to work with MulticoreWare to accelerate the development of specific
+features or optimized support for specific hardware or software
+platforms, or to contract for support.
+
+The GNU GPL v2 license or the x265 commercial license agreement govern
+your rights to access the copyrighted x265 software source code, but do
+not cover any patents that may be applicable to the function of binary
+executable software created from the x265 source code.  You are
+responsible for understanding the laws in your country, and for
+licensing all applicable patent rights needed for use or distribution of
+software applications created from the x265 source code.  A good place
+to start is with the `Motion Picture Experts Group - Licensing Authority
+- HEVC Licensing Program <http://www.mpegla.com/main/PID/HEVC/default.aspx>`_.
+
+x265 is a registered trademark of MulticoreWare, Inc.  The x265 logo is
+a trademark of MulticoreWare, and may only be used with explicit written
+permission.  All rights reserved.
--- a/x265/doc/reST/lossless.rst
+++ b/x265/doc/reST/lossless.rst
@ -0,0 +1,162 @@
+Lossless
+--------
+
+Lossless Encoding
+=================
+
+x265 can encode HEVC bitstreams that are entirely lossless (the
+reconstructed images are bit-exact to the source images) by using the
+:option:`--lossless` option.  Lossless operation is theoretically
+simple. Rate control, by definition, is disabled and the encoder
+disables all quality metrics since they would only waste CPU cycles.
+Instead, x265 reports only a compression factor at the end of the
+encode.
+
+In HEVC, lossless coding means bypassing both the DCT transforms and
+bypassing quantization (often referred to as transquant bypass).  Normal
+predictions are still allowed, so the encoder will find optimal inter or
+intra predictions and then losslessly code the residual (with transquant
+bypass).
+
+All :option:`--preset` options are capable of generating lossless video
+streams, but in general the slower the preset the better the compression
+ratio (and the slower the encode). Here are some examples::
+
+	./x265 ../test-720p.y4m o.bin --preset ultrafast --lossless
+	... <snip> ...
+	encoded 721 frames in 238.38s (3.02 fps), 57457.94 kb/s
+
+	./x265 ../test-720p.y4m o.bin --preset faster --lossless
+	... <snip> ...
+	x265 [info]: lossless compression ratio 3.11::1
+	encoded 721 frames in 258.46s (2.79 fps), 56787.65 kb/s
+
+	./x265 ../test-720p.y4m o.bin --preset slow --lossless
+	... <snip> ...
+	x265 [info]: lossless compression ratio 3.36::1
+	encoded 721 frames in 576.73s (1.25 fps), 52668.25 kb/s
+
+	./x265 ../test-720p.y4m o.bin --preset veryslow --lossless
+	x265 [info]: lossless compression ratio 3.76::1
+	encoded 721 frames in 6298.22s (0.11 fps), 47008.65 kb/s
+ 
+.. Note::
+	In HEVC, only QP=4 is truly lossless quantization, and thus when
+	encoding losslesly x265 uses QP=4 internally in its RDO decisions.
+
+Near-lossless Encoding
+======================
+
+Near-lossless conditions are a quite a bit more interesting.  Normal ABR
+rate control will allow one to scale the bitrate up to the point where 
+quantization is entirely bypassed (QP <= 4), but even at this point
+there is a lot of SSIM left on the table because of the DCT transforms,
+which are not lossless::
+
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 40000 --ssim
+	encoded 721 frames in 326.62s (2.21 fps), 39750.56 kb/s, SSIM Mean Y: 0.9990703 (30.317 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 50000 --ssim
+	encoded 721 frames in 349.27s (2.06 fps), 44326.84 kb/s, SSIM Mean Y: 0.9994134 (32.316 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 60000 --ssim
+	encoded 721 frames in 360.04s (2.00 fps), 45394.50 kb/s, SSIM Mean Y: 0.9994823 (32.859 dB)
+
+For the encoder to get over this quality plateau, one must enable
+lossless coding at the CU level with :option:`--cu-lossless`.  It tells
+the encoder to evaluate trans-quant bypass as a coding option for each
+CU, and to pick the option with the best rate-distortion
+characteristics.
+
+The :option:`--cu-lossless` option is very expensive, computationally,
+and it only has a positive effect when the QP is extremely low, allowing
+RDO to spend a large amount of bits to make small improvements to
+quality.  So this option should only be enabled when you are encoding
+near-lossless bitstreams::
+
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 40000 --ssim --cu-lossless
+	encoded 721 frames in 500.51s (1.44 fps), 40017.10 kb/s, SSIM Mean Y: 0.9997790 (36.557 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 50000 --ssim --cu-lossless
+	encoded 721 frames in 524.60s (1.37 fps), 46083.37 kb/s, SSIM Mean Y: 0.9999432 (42.456 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 60000 --ssim --cu-lossless
+	encoded 721 frames in 523.63s (1.38 fps), 46552.92 kb/s, SSIM Mean Y: 0.9999489 (42.917 dB)
+
+.. Note::
+	It is not unusual for bitrate to drop as you increase lossless coding.
+	Having "perfectly coded" reference blocks reduces residual in later
+	frames. It is quite possible for a near-lossless encode to spend
+	more bits than a lossless encode.
+
+Enabling psycho-visual rate distortion will improve lossless coding.
+:option:`--psy-rd` influences the RDO decisions in favor of energy
+(detail) preservation over bit cost and results in more blocks being
+losslessly coded.  Our psy-rd feature is not yet assembly optimized, so
+this makes the encodes run even slower::
+
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 40000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 581.83s (1.24 fps), 40112.15 kb/s, SSIM Mean Y: 0.9998632 (38.638 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 50000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 587.54s (1.23 fps), 46284.55 kb/s, SSIM Mean Y: 0.9999663 (44.721 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset medium --bitrate 60000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 592.93s (1.22 fps), 46839.51 kb/s, SSIM Mean Y: 0.9999707 (45.334 dB)
+
+:option:`--cu-lossless` will also be more effective at slower
+presets which perform RDO at more levels and thus may find smaller
+blocks that would benefit from lossless coding::
+
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 40000 --ssim --cu-lossless
+	encoded 721 frames in 12969.25s (0.06 fps), 37331.96 kb/s, SSIM Mean Y: 0.9998108 (37.231 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 50000 --ssim --cu-lossless
+	encoded 721 frames in 46217.84s (0.05 fps), 42976.28 kb/s, SSIM Mean Y: 0.9999482 (42.856 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 60000 --ssim --cu-lossless
+	encoded 721 frames in 13738.17s (0.05 fps), 43864.21 kb/s, SSIM Mean Y: 0.9999633 (44.348 dB)
+	
+And with psy-rd and a slow preset together, very high SSIMs are
+possible::
+
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 40000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 11675.81s (0.06 fps), 37819.45 kb/s, SSIM Mean Y: 0.9999181 (40.867 dB)
+	    
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 50000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 12414.56s (0.06 fps), 42815.75 kb/s, SSIM Mean Y: 0.9999758 (46.168 dB)
+	
+	./x265 ../test-720p.y4m o.bin --preset veryslow --bitrate 60000 --ssim --cu-lossless --psy-rd 1.0
+	encoded 721 frames in 11684.89s (0.06 fps), 43324.48 kb/s, SSIM Mean Y: 0.9999793 (46.835 dB)
+
+
+It's important to note in the end that it is easier (less work) for the
+encoder to encode the video losslessly than it is to encode it
+near-losslessly. If the encoder knows up front the encode must be
+lossless, it does not need to evaluate any lossy coding methods. The
+encoder only needs to find the most efficient prediction for each block
+and then entropy code the residual.
+
+It is not feasible for :option:`--cu-lossless` to turn itself on when
+the encoder determines it is encoding a near-lossless bitstream (ie:
+when rate control nearly disables all quantization) because the feature
+requires a flag to be enabled in the stream headers. At the time the
+stream headers are being coded we do not know whether
+:option:`--cu-lossless` would be a help or a hinder.  If very few or no
+blocks end up being coded as lossless, then having the feature enabled
+is a net loss in compression efficiency because it adds a flag that must
+be coded for every CU. So ignoring even the performance aspects of the
+feature, it can be a compression loss if enabled without being used. So
+it is up to the user to only enable this feature when they are coding at
+near-lossless quality.
+
+Transform Skip
+==============
+
+A somewhat related feature, :option:`--tskip` tells the encoder to
+evaluate transform-skip (bypass DCT but with quantization still enabled)
+when coding small 4x4 transform blocks. This feature is intended to
+improve the coding efficiency of screen content (aka: text on a screen)
+and is not really intended for lossless coding.  This feature should
+only be enabled if the content has a lot of very sharp edges in it, and
+is mostly unrelated to lossless coding.
--- a/x265/doc/reST/presets.rst
+++ b/x265/doc/reST/presets.rst
@ -0,0 +1,182 @@
+Preset Options
+--------------
+
+.. _presets:
+
+Presets
+=======
+
+x265 has a number of predefined :option:`--preset` options that make
+trade-offs between encode speed (encoded frames per second) and
+compression efficiency (quality per bit in the bitstream).  The default
+preset is medium, it does a reasonably good job of finding the best
+possible quality without spending enormous CPU cycles looking for the
+absolute most efficient way to achieve that quality.  As you go higher
+than medium, the encoder takes shortcuts to improve performance at the
+expense of quality and compression efficiency.  As you go lower than
+medium, the encoder tries harder and harder to achieve the best quailty
+per bit compression ratio.
+
+The presets adjust encoder parameters to affect these trade-offs.
+
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+|              | ultrafast | superfast | veryfast | faster | fast | medium | slow | slower | veryslow | placebo |
+==============+===========+===========+==========+========+======+========+======+========+==========+=========+
+| ctu          |   32      |    32     |   32     |  64    |  64  |   64   |  64  |  64    |   64     |   64    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| min-cu-size  |   16      |     8     |    8     |   8    |   8  |    8   |   8  |   8    |    8     |    8    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| bframes      |    3      |     3     |    4     |   4    |  4   |    4   |  4   |   8    |    8     |    8    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| b-adapt      |    0      |     0     |    0     |   0    |  0   |    2   |  2   |   2    |    2     |    2    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| rc-lookahead |    5      |    10     |   15     |  15    |  15  |   20   |  25  |   30   |   40     |   60    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| scenecut     |    0      |    40     |   40     |  40    |  40  |   40   |  40  |   40   |   40     |   40    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| refs         |    1      |     1     |    1     |   1    |  2   |    3   |  3   |   3    |    5     |    5    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| me           |   dia     |   hex     |   hex    |  hex   | hex  |   hex  | star |  star  |   star   |   star  |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| merange      |   57      |    57     |   57     |  57    |  57  |   57   | 57   |  57    |   57     |   92    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| subme        |    0      |     1     |    1     |   2    |  2   |    2   |  3   |   3    |    4     |    5    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| rect         |    0      |     0     |    0     |   0    |  0   |    0   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| amp          |    0      |     0     |    0     |   0    |  0   |    0   |  0   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| max-merge    |    2      |     2     |    2     |   2    |  2   |    2   |  3   |   3    |    4     |    5    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| early-skip   |    1      |     1     |    1     |   1    |  0   |    0   |  0   |   0    |    0     |    0    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| fast-intra   |    1      |     1     |    1     |   1    |  1   |    0   |  0   |   0    |    0     |    0    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| b-intra      |    0      |     0     |    0     |   0    |  0   |    0   |  0   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| sao          |    0      |     0     |    1     |   1    |  1   |    1   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| signhide     |    0      |     1     |    1     |   1    |  1   |    1   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| weightp      |    0      |     0     |    1     |   1    |  1   |    1   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| weightb      |    0      |     0     |    0     |   0    |  0   |    0   |  0   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| aq-mode      |    0      |     0     |    1     |   1    |  1   |    1   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| cuTree       |    0      |     0     |    0     |   0    |  1   |    1   |  1   |   1    |    1     |    1    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| rdLevel      |    2      |     2     |    2     |   2    |  2   |    3   |  4   |   6    |    6     |    6    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| rdoq-level   |    0      |     0     |    0     |   0    |  0   |    0   |  2   |   2    |    2     |    2    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| tu-intra     |    1      |     1     |    1     |   1    |  1   |    1   |  1   |   2    |    3     |    4    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+| tu-inter     |    1      |     1     |    1     |   1    |  1   |    1   |  1   |   2    |    3     |    4    |
+--------------+-----------+-----------+----------+--------+------+--------+------+--------+----------+---------+
+
+Placebo mode enables transform-skip prediction evaluation.
+
+.. _tunings:
+
+Tuning
+======
+
+There are a few :option:`--tune` options available, which are applied
+after the preset.
+
+.. Note::
+
+	The *psnr* and *ssim* tune options disable all optimizations that
+	sacrafice metric scores for perceived visual quality (also known as
+	psycho-visual optimizations). By default x265 always tunes for
+	highest perceived visual quality but if one intends to measure an
+	encode using PSNR or SSIM for the purpose of benchmarking, we highly
+	recommend you configure x265 to tune for that particular metric.
+
+--------------+-----------------------------------------------------+
+| --tune       | effect                                              |
+==============+=====================================================+
+| psnr         | disables adaptive quant, psy-rd, and cutree         |
+--------------+-----------------------------------------------------+
+| ssim         | enables adaptive quant auto-mode, disables psy-rd   |
+--------------+-----------------------------------------------------+
+| grain        | improves retention of film grain. more below        |
+--------------+-----------------------------------------------------+
+| fastdecode   | no loop filters, no weighted pred, no intra in B    |
+--------------+-----------------------------------------------------+
+| zerolatency  | no lookahead, no B frames, no cutree                |
+--------------+-----------------------------------------------------+
+
+
+
+Film Grain Retention
+~~~~~~~~~~~~~~~~~~~~
+
+:option:`--tune` *grain* tries to improve the retention of film grain in
+the reconstructed output. It disables rate distortion optimizations in
+quantization, and increases the default psy-rd.
+
+    * :option:`--psy-rd` 0.5
+    * :option:`--rdoq-level` 0
+    * :option:`--psy-rdoq` 0
+
+It lowers the strength of adaptive quantization, so residual energy can
+be more evenly distributed across the (noisy) picture:
+
+    * :option:`--aq-strength` 0.3
+
+And it similarly tunes rate control to prevent the slice QP from
+swinging too wildly from frame to frame:
+
+    * :option:`--ipratio` 1.1
+    * :option:`--pbratio` 1.1
+    * :option:`--qcomp` 0.8
+
+And lastly it reduces the strength of deblocking to prevent grain being
+blurred on block boundaries:
+
+    * :option:`--deblock` -2
+
+Fast Decode
+~~~~~~~~~~~
+
+:option:`--tune` *fastdecode* disables encoder features which tend to be
+bottlenecks for the decoder. It is intended for use with 4K content at
+high bitrates which can cause decoders to struggle. It disables both
+HEVC loop filters, which tend to be process bottlenecks:
+
+    * :option:`--no-deblock`
+    * :option:`--no-sao`
+
+It disables weighted prediction, which tend to be bandwidth bottlenecks:
+
+    * :option:`--no-weightp`
+    * :option:`--no-weightb`
+
+And it disables intra blocks in B frames with :option:`--no-b-intra`
+since intra predicted blocks cause serial dependencies in the decoder.
+
+Zero Latency
+~~~~~~~~~~~~
+
+There are two halves to the latency problem. There is latency at the
+decoder and latency at the encoder. :option:`--tune` *zerolatency*
+removes latency from both sides. The decoder latency is removed by:
+
+    * :option:`--bframes` 0
+
+Encoder latency is removed by:
+
+    * :option:`--b-adapt` 0
+    * :option:`--rc-lookahead` 0
+    * :option:`--no-scenecut`
+    * :option:`--no-cutree`
+    * :option:`--frame-threads` 1
+
+With all of these settings x265_encoder_encode() will run synchronously,
+the picture passed as pic_in will be encoded and returned as NALs. These
+settings disable frame parallelism, which is an important component for
+x265 performance. If you can tolerate any latency on the encoder, you
+can increase performance by increasing the number of frame threads. Each
+additional frame thread adds one frame of latency.
--- a/x265/doc/reST/threading.rst
+++ b/x265/doc/reST/threading.rst
@ -0,0 +1,266 @@
+*********
+Threading
+*********
+
+.. _pools:
+
+Thread Pools
+============
+
+x265 creates one or more thread pools per encoder, one pool per NUMA
+node (typically a CPU socket). :option:`--pools` specifies the number of
+pools and the number of threads per pool the encoder will allocate. By
+default x265 allocates one thread per (hyperthreaded) CPU core on each
+NUMA node.
+
+If you are running multiple encoders on a system with multiple NUMA
+nodes, it is recommended to isolate each of them to a single node in
+order to avoid the NUMA overhead of remote memory access.
+
+Work distribution is job based. Idle worker threads scan the job
+providers assigned to their thread pool for jobs to perform. When no
+jobs are available, the idle worker threads block and consume no CPU
+cycles.
+
+Objects which desire to distribute work to worker threads are known as
+job providers (and they derive from the JobProvider class).  The thread
+pool has a method to **poke** awake a blocked idle thread, and job
+providers are recommended to call this method when they make new jobs
+available.
+
+Worker jobs are not allowed to block except when absolutely necessary
+for data locking. If a job becomes blocked, the work function is
+expected to drop that job so the worker thread may go back to the pool
+and find more work.
+
+On Windows, the native APIs offer sufficient functionality to discover
+the NUMA topology and enforce the thread affinity that libx265 needs (so
+long as you have not chosen to target XP or Vista), but on POSIX systems
+it relies on libnuma for this functionality. If your target POSIX system
+is single socket, then building without libnuma is a perfectly
+reasonable option, as it will have no effect on the runtime behavior. On
+a multiple-socket system, a POSIX build of libx265 without libnuma will
+be less work efficient, but will still function correctly. You lose the
+work isolation effect that keeps each frame encoder from only using the
+threads of a single socket and so you incur a heavier context switching
+cost.
+
+Wavefront Parallel Processing
+=============================
+
+New with HEVC, Wavefront Parallel Processing allows each row of CTUs to
+be encoded in parallel, so long as each row stays at least two CTUs
+behind the row above it, to ensure the intra references and other data
+of the blocks above and above-right are available. WPP has almost no
+effect on the analysis and compression of each CTU and so it has a very
+small impact on compression efficiency relative to slices or tiles. The
+compression loss from WPP has been found to be less than 1% in most of
+our tests.
+
+WPP has three effects which can impact efficiency. The first is the row
+starts must be signaled in the slice header, the second is each row must
+be padded to an even byte in length, and the third is the state of the
+entropy coder is transferred from the second CTU of each row to the
+first CTU of the row below it.  In some conditions this transfer of
+state actually improves compression since the above-right state may have
+better locality than the end of the previous row.
+
+Parabola Research have published an excellent HEVC
+`animation <http://www.parabolaresearch.com/blog/2013-12-01-hevc-wavefront-animation.html>`_
+which visualizes WPP very well.  It even correctly visualizes some of
+WPPs key drawbacks, such as:
+
+1. the low thread utilization at the start and end of each frame
+2. a difficult block may stall the wave-front and it takes a while for
+   the wave-front to recover.
+3. 64x64 CTUs are big! there are much fewer rows than with H.264 and
+   similar codecs
+
+Because of these stall issues you rarely get the full parallelisation
+benefit one would expect from row threading. 30% to 50% of the
+theoretical perfect threading is typical.
+
+In x265 WPP is enabled by default since it not only improves performance
+at encode but it also makes it possible for the decoder to be threaded.
+
+If WPP is disabled by :option:`--no-wpp` the frame will be encoded in
+scan order and the entropy overheads will be avoided.  If frame
+threading is not disabled, the encoder will change the default frame
+thread count to be higher than if WPP was enabled.  The exact formulas
+are described in the next section.
+
+Bonded Task Groups
+==================
+
+If a worker thread job has work which can be performed in parallel by
+many threads, it may allocate a bonded task group and enlist the help of
+other idle worker threads from the same thread pool. Those threads will
+cooperate to complete the work of the bonded task group and then return
+to their idle states. The larger and more uniform those tasks are, the
+better the bonded task group will perform.
+
+Parallel Mode Analysis
+~~~~~~~~~~~~~~~~~~~~~~
+
+When :option:`--pmode` is enabled, each CU (at all depths from 64x64 to
+8x8) will distribute its analysis work to the thread pool via a bonded
+task group. Each analysis job will measure the cost of one prediction
+for the CU: merge, skip, intra, inter (2Nx2N, Nx2N, 2NxN, and AMP).
+
+At slower presets, the amount of increased parallelism from pmode is
+often enough to be able to reduce or disable frame parallelism while
+achieving the same overall CPU utilization. Reducing frame threads is
+often beneficial to ABR and VBV rate control.
+
+Parallel Motion Estimation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When :option:`--pme` is enabled all of the analysis functions which
+perform motion searches to reference frames will distribute those motion
+searches to other worker threads via a bonded task group (if more than
+two motion searches are required).
+
+Frame Threading
+===============
+
+Frame threading is the act of encoding multiple frames at the same time.
+It is a challenge because each frame will generally use one or more of
+the previously encoded frames as motion references and those frames may
+still be in the process of being encoded themselves.
+
+Previous encoders such as x264 worked around this problem by limiting
+the motion search region within these reference frames to just one
+macroblock row below the coincident row being encoded. Thus a frame
+could be encoded at the same time as its reference frames so long as it
+stayed one row behind the encode progress of its references (glossing
+over a few details). 
+
+x265 has the same frame threading mechanism, but we generally have much
+less frame parallelism to exploit than x264 because of the size of our
+CTU rows. For instance, with 1080p video x264 has 68 16x16 macroblock
+rows available each frame while x265 only has 17 64x64 CTU rows.
+
+The second extenuating circumstance is the loop filters. The pixels used
+for motion reference must be processed by the loop filters and the loop
+filters cannot run until a full row has been encoded, and it must run a
+full row behind the encode process so that the pixels below the row
+being filtered are available. On top of this, HEVC has two loop filters:
+deblocking and SAO, which must be run in series with a row lag between
+them. When you add up all the row lags each frame ends up being 3 CTU
+rows behind its reference frames (the equivalent of 12 macroblock rows
+for x264). And keep in mind the wave-front progression pattern; by the
+time the reference frame finishes the third row of CTUs, nearly half of
+the CTUs in the frame may be compressed (depending on the display aspect
+ratio).
+
+The third extenuating circumstance is that when a frame being encoded
+becomes blocked by a reference frame row being available, that frame's
+wave-front becomes completely stalled and when the row becomes available
+again it can take quite some time for the wave to be restarted, if it
+ever does. This makes WPP less effective when frame parallelism is in
+use.
+
+:option:`--merange` can have a negative impact on frame parallelism. If
+the range is too large, more rows of CTU lag must be added to ensure
+those pixels are available in the reference frames.
+
+.. note::
+
+	Even though the merange is used to determine the amount of reference
+	pixels that must be available in the reference frames, the actual
+	motion search is not necessarily centered around the coincident
+	block. The motion search is actually centered around the motion
+	predictor, but the available pixel area (mvmin, mvmax) is determined
+	by merange and the interpolation filter half-heights.
+
+When frame threading is disabled, the entirety of all reference frames
+are always fully available (by definition) and thus the available pixel
+area is not restricted at all, and this can sometimes improve
+compression efficiency. Because of this, the output of encodes with
+frame parallelism disabled will not match the output of encodes with
+frame parallelism enabled; but when enabled the number of frame threads
+should have no effect on the output bitstream except when using ABR or
+VBV rate control or noise reduction.
+
+When :option:`--nr` is enabled, the outputs of each number of frame threads
+will be deterministic but none of them will match becaue each frame
+encoder maintains a cumulative noise reduction state.
+
+VBV introduces non-determinism in the encoder, at this point in time,
+regardless of the amount of frame parallelism.
+
+By default frame parallelism and WPP are enabled together. The number of
+frame threads used is auto-detected from the (hyperthreaded) CPU core
+count, but may be manually specified via :option:`--frame-threads`
+
+	+-------+--------+
+	| Cores | Frames |
+	+=======+========+
+	|  > 32 |  6..8  |
+	+-------+--------+
+	| >= 16 |   5    |
+	+-------+--------+
+	| >= 8  |   3    |
+	+-------+--------+
+	| >= 4  |   2    |
+	+-------+--------+
+
+If WPP is disabled, then the frame thread count defaults to **min(cpuCount, ctuRows / 2)**
+
+Over-allocating frame threads can be very counter-productive. They
+each allocate a large amount of memory and because of the limited number
+of CTU rows and the reference lag, you generally get limited benefit
+from adding frame encoders beyond the auto-detected count, and often
+the extra frame encoders reduce performance.
+
+Given these considerations, you can understand why the faster presets
+lower the max CTU size to 32x32 (making twice as many CTU rows available
+for WPP and for finer grained frame parallelism) and reduce
+:option:`--merange`
+
+Each frame encoder runs in its own thread (allocated separately from the
+worker pool). This frame thread has some pre-processing responsibilities
+and some post-processing responsibilities for each frame, but it spends
+the bulk of its time managing the wave-front processing by making CTU
+rows available to the worker threads when their dependencies are
+resolved.  The frame encoder threads spend nearly all of their time
+blocked in one of 4 possible locations:
+
+1. blocked, waiting for a frame to process
+2. blocked on a reference frame, waiting for a CTU row of reconstructed
+   and loop-filtered reference pixels to become available
+3. blocked waiting for wave-front completion
+4. blocked waiting for the main thread to consume an encoded frame
+
+Lookahead
+=========
+
+The lookahead module of x265 (the lowres pre-encode which determines
+scene cuts and slice types) uses the thread pool to distribute the
+lowres cost analysis to worker threads. It will use bonded task groups
+to perform batches of frame cost estimates, and it may optionally use
+bonded task groups to measure single frame cost estimates using slices.
+(see :option:`--lookahead-slices`)
+
+The main slicetypeDecide() function itself is also performed by a worker
+thread if your encoder has a thread pool, else it runs within the
+context of the thread which calls the x265_encoder_encode().
+
+SAO
+===
+
+The Sample Adaptive Offset loopfilter has a large effect on encode
+performance because of the peculiar way it must be analyzed and coded.
+
+SAO flags and data are encoded at the CTU level before the CTU itself is
+coded, but SAO analysis (deciding whether to enable SAO and with what
+parameters) cannot be performed until that CTU is completely analyzed
+(reconstructed pixels are available) as well as the CTUs to the right
+and below.  So in effect the encoder must perform SAO analysis in a
+wavefront at least a full row behind the CTU compression wavefront.
+
+This extra latency forces the encoder to save the encode data of every
+CTU until the entire frame has been analyzed, at which point a function
+can code the final slice bitstream with the decided SAO flags and data
+interleaved between each CTU.  This second pass over the CTUs can be
+expensive, particularly at large resolutions and high bitrates.
--- a/x265/doc/reST/x265.rst
+++ b/x265/doc/reST/x265.rst
@ -0,0 +1,49 @@
+x265 CLI Documentation
+######################
+
+
+SYNOPSIS
+========
+
+**x265** [options] infile [-o] outfile
+
+Bit depth: 8
+
+
+**x265-10bit** [options] infile [-o] outfile
+
+Bit depth: 10
+
+
+infile can be YUV or Y4M
+
+outfile is raw HEVC bitstream
+
+
+DESCRIPTION
+===========
+
+.. toctree::
+   :maxdepth: 2
+
+   introduction
+
+
+OPTIONS
+=======
+
+.. toctree::
+   :maxdepth: 2
+
+   cli
+   presets
+   lossless
+
+
+SEE ALSO
+========
+
+**libx265**\(3)
+
+Online documentation: http://x265.readthedocs.org/en/default/cli.html
+
--- a/x265/doc/uncrustify/codingstyle.cfg
+++ b/x265/doc/uncrustify/codingstyle.cfg
@ -0,0 +1,232 @@
+align_func_params=true
+align_keep_tabs=false
+align_left_shift=true
+align_mix_var_proto=false
+align_nl_cont=false
+align_number_left=false
+align_oc_decl_colon=false
+align_on_operator=false
+align_on_tabstop=false
+align_right_cmt_mix=false
+align_single_line_brace=false
+align_single_line_func=false
+align_var_def_attribute=false
+align_var_def_colon=false
+align_var_def_inline=false
+align_with_tabs=false
+cmt_c_group=true
+cmt_c_nl_end=false
+cmt_c_nl_start=true
+cmt_cpp_group=true
+cmt_cpp_nl_end=false
+cmt_cpp_nl_start=false
+cmt_cpp_to_c=false
+cmt_indent_multi=false
+cmt_insert_before_preproc=false
+cmt_multi_check_last=true
+cmt_reflow_mode=1
+cmt_sp_before_star_cont=0
+cmt_star_cont=true
+cmt_width=130
+#code_width=130
+eat_blanks_after_open_brace=true
+eat_blanks_before_close_brace=true
+indent_access_spec_body=false
+indent_align_assign=false
+indent_align_string=false
+indent_bool_paren=false
+indent_brace_parent=false
+indent_braces=false
+indent_braces_no_class=false
+indent_braces_no_func=false
+indent_braces_no_struct=false
+indent_class=true
+indent_class_colon=false
+indent_cmt_with_tabs=false
+indent_col1_comment=false
+indent_columns=4
+indent_comma_paren=false
+indent_else_if=false
+indent_extern=false
+indent_first_bool_expr=false
+indent_func_call_param=false
+indent_func_class_param=false
+indent_func_ctor_var_param=false
+indent_func_def_param=false
+indent_func_param_double=false
+indent_func_proto_param=false
+indent_namespace=false
+indent_paren_nl=false
+indent_preserve_sql=false
+indent_relative_single_line_comments=false
+indent_square_nl=false
+indent_template_param=false
+indent_var_def_cont=false
+indent_with_tabs=0
+input_tab_size=2
+ls_for_split_full=true
+ls_func_split_full=true
+mod_add_long_ifdef_else_comment=10
+mod_add_long_ifdef_endif_comment=10
+mod_full_brace_do=add
+mod_full_brace_for=add
+mod_full_brace_if=ignore
+mod_full_brace_if_chain=false
+mod_full_brace_while=add
+mod_full_paren_if_bool=false
+mod_move_case_break=false
+mod_paren_on_return=remove
+mod_pawn_semicolon=false
+mod_remove_empty_return=true
+mod_remove_extra_semicolon=true
+mod_sort_import=false
+mod_sort_include=false
+mod_sort_using=false
+newlines=lf
+nl_after_access_spec=2
+#nl_after_brace_close=ignore
+#nl_after_brace_open=ignore
+nl_after_brace_open_cmt=true
+nl_after_case=false
+nl_after_class=2
+nl_after_for=add
+nl_after_func_body=2
+nl_after_func_body_one_liner=2
+nl_after_if=ignore
+nl_after_multiline_comment=true
+nl_after_return=false
+nl_after_semicolon=true
+nl_after_struct=2
+nl_after_switch=add
+nl_after_vbrace_close=false
+nl_after_vbrace_open=false
+nl_after_vbrace_open_empty=false
+nl_after_while=add
+nl_assign_brace=add
+nl_assign_leave_one_liners=true
+nl_before_access_spec=2
+nl_before_block_comment=2
+nl_before_case=false
+nl_brace_else=add
+nl_brace_while=add
+nl_case_colon_brace=add
+nl_class_brace=add
+nl_class_init_args=ignore
+nl_class_leave_one_liners=true
+nl_collapse_empty_body=false
+nl_create_for_one_liner=false
+nl_create_if_one_liner=false
+nl_create_while_one_liner=false
+nl_define_macro=false
+nl_do_brace=add
+nl_ds_struct_enum_close_brace=false
+nl_ds_struct_enum_cmt=false
+nl_else_brace=add
+nl_else_if=remove
+nl_elseif_brace=add
+nl_end_of_file=add
+nl_end_of_file_min=1
+nl_enum_brace=add
+nl_enum_leave_one_liners=true
+nl_fdef_brace=add
+nl_for_brace=add
+nl_func_decl_end=remove
+nl_func_decl_start=remove
+nl_func_def_paren=remove
+nl_func_def_start=remove
+nl_func_leave_one_liners=true
+nl_func_paren=remove
+nl_func_proto_type_name=remove
+nl_func_type_name=remove
+nl_func_type_name_class=remove
+nl_func_var_def_blk=2
+nl_getset_leave_one_liners=true
+nl_if_brace=add
+nl_if_leave_one_liners=true
+nl_max=2
+nl_multi_line_cond=false
+nl_multi_line_define=false
+nl_namespace_brace=remove
+nl_return_expr=remove
+nl_squeeze_ifdef=false
+nl_start_of_file=remove
+nl_struct_brace=add
+nl_switch_brace=add
+nl_template_class=add
+nl_while_brace=add
+pp_define_at_level=false
+pp_if_indent_code=false
+pp_indent=remove
+pp_indent_at_level=false
+pp_region_indent_code=false
+sp_addr=remove
+sp_after_angle=remove
+sp_after_cast=remove
+sp_after_class_colon=add
+sp_after_comma=add
+sp_after_dc=remove
+sp_after_new=add
+sp_after_operator=add
+sp_after_operator_sym=remove
+sp_after_type=ignore
+sp_angle_paren=remove
+sp_angle_word=add
+sp_arith=add
+sp_assign=add
+sp_assign_default=add
+sp_attribute_paren=remove
+sp_balance_nested_parens=false
+sp_before_angle=remove
+sp_before_case_colon=remove
+sp_before_class_colon=add
+sp_before_comma=remove
+sp_before_dc=remove
+sp_before_nl_cont=add
+sp_before_semi=remove
+sp_before_semi_for=remove
+sp_before_semi_for_empty=remove
+sp_before_sparen=add
+sp_before_square=remove
+sp_before_squares=ignore
+sp_before_tr_emb_cmt=add
+sp_bool=add
+sp_brace_else=add
+sp_cmt_cpp_start=ignore
+sp_compare=add
+sp_cond_colon=add
+sp_cond_question=add
+sp_cpp_cast_paren=remove
+sp_defined_paren=remove
+sp_deref=remove
+sp_else_brace=add
+sp_endif_cmt=add
+sp_enum_assign=add
+sp_fparen_brace=add
+sp_func_call_paren=remove
+sp_func_class_paren=remove
+sp_func_def_paren=remove
+sp_func_proto_paren=remove
+sp_incdec=remove
+sp_inside_angle=remove
+sp_inside_braces=add
+#sp_inside_braces_empty=remove
+sp_inside_fparen=remove
+sp_inside_fparens=remove
+sp_inside_paren=remove
+sp_inside_paren_cast=remove
+sp_inside_sparen=remove
+sp_inside_square=remove
+sp_inv=remove
+sp_member=remove
+sp_not=remove
+sp_paren_brace=add
+sp_paren_paren=remove
+sp_pp_concat=add
+sp_sign=remove
+sp_sizeof_paren=remove
+sp_special_semi=ignore
+sp_template_angle=remove
+tok_split_gte=false
+utf8_bom=remove
+utf8_byte=false
+utf8_force=false
--- a/x265/readme.rst
+++ b/x265/readme.rst
@ -0,0 +1,14 @@
+=================
+x265 HEVC Encoder
+=================
+
+| **Read:** | Online `documentation <http://x265.readthedocs.org/en/default/>`_ | Developer `wiki <http://bitbucket.org/multicoreware/x265/wiki/>`_
+| **Download:** | `releases <http://ftp.videolan.org/pub/videolan/x265/>`_ 
+| **Interact:** | #x265 on freenode.irc.net | `x265-devel@videolan.org <http://mailman.videolan.org/listinfo/x265-devel>`_ | `Report an issue <https://bitbucket.org/multicoreware/x265/issues?status=new&status=open>`_
+
+`x265 <https://www.videolan.org/developers/x265.html>`_ is an open
+source HEVC encoder. See the developer wiki for instructions for
+downloading and building the source.
+
+x265 is free to use under the `GNU GPL <http://www.gnu.org/licenses/gpl-2.0.html>`_ 
+and is also available under a commercial `license <http://x265.org>`_ 
--- a/x265/source/CMakeLists.txt
+++ b/x265/source/CMakeLists.txt
@ -0,0 +1,586 @@
+# vim: syntax=cmake
+if(NOT CMAKE_BUILD_TYPE)
+    # default to Release build for GCC builds
+    set(CMAKE_BUILD_TYPE Release CACHE STRING
+        "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
+        FORCE)
+endif()
+message(STATUS "cmake version ${CMAKE_VERSION}")
+if(POLICY CMP0025)
+    cmake_policy(SET CMP0025 OLD) # report Apple's Clang as just Clang
+endif()
+if(POLICY CMP0042)
+    cmake_policy(SET CMP0042 NEW) # MACOSX_RPATH
+endif()
+if(POLICY CMP0054)
+    cmake_policy(SET CMP0054 OLD) # Only interpret if() arguments as variables or keywords when unquoted
+endif()
+
+project (x265)
+cmake_minimum_required (VERSION 2.8.8) # OBJECT libraries require 2.8.8
+include(CheckIncludeFiles)
+include(CheckFunctionExists)
+include(CheckSymbolExists)
+include(CheckCXXCompilerFlag)
+
+option(FPROFILE_GENERATE "Compile executable to generate usage data" OFF)
+option(FPROFILE_USE "Compile executable using generated usage data" OFF)
+option(NATIVE_BUILD "Target the build CPU" OFF)
+option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
+mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
+
+# X265_BUILD must be incremented each time the public API is changed
+set(X265_BUILD 75)
+configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
+               "${PROJECT_BINARY_DIR}/x265.def")
+configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
+               "${PROJECT_BINARY_DIR}/x265_config.h")
+
+SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
+
+# System architecture detection
+string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
+set(X86_ALIASES x86 i386 i686 x86_64 amd64)
+list(FIND X86_ALIASES "${SYSPROC}" X86MATCH)
+set(POWER_ALIASES ppc64 ppc64le)
+list(FIND POWER_ALIASES "${SYSPROC}" POWERMATCH)
+if("${SYSPROC}" STREQUAL "" OR X86MATCH GREATER "-1")
+    message(STATUS "Detected x86 target processor")
+    set(X86 1)
+    add_definitions(-DX265_ARCH_X86=1)
+    if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
+        set(X64 1)
+        add_definitions(-DX86_64=1)
+    endif()
+elseif(POWERMATCH GREATER "-1")
+    message(STATUS "Detected POWER target processor")
+    set(POWER 1)
+    add_definitions(-DX265_ARCH_POWER=1)
+elseif(${SYSPROC} STREQUAL "armv6l")
+    message(STATUS "Detected ARM target processor")
+    set(ARM 1)
+    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1)
+else()
+    message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
+    message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
+endif()
+
+if(UNIX)
+    list(APPEND PLATFORM_LIBS pthread)
+    find_library(LIBRT rt)
+    if(LIBRT)
+        list(APPEND PLATFORM_LIBS rt)
+    endif()
+    mark_as_advanced(LIBRT)
+    find_library(LIBDL dl)
+    if(LIBDL)
+        list(APPEND PLATFORM_LIBS dl)
+    endif()
+    option(ENABLE_LIBNUMA "Enable libnuma usage (Linux only)" ON)
+    if(ENABLE_LIBNUMA)
+        find_package(Numa)
+        if(NUMA_FOUND)
+            link_directories(${NUMA_LIBRARY_DIR})
+            list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
+            check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2)
+            if(NUMA_V2)
+                add_definitions(-DHAVE_LIBNUMA)
+                message(STATUS "libnuma found, building with support for NUMA nodes")
+                list(APPEND PLATFORM_LIBS numa)
+                include_directories(${NUMA_INCLUDE_DIR})
+            endif()
+        endif()
+        mark_as_advanced(NUMA_FOUND)
+    endif(ENABLE_LIBNUMA)
+    option(NO_ATOMICS "Use a slow mutex to replace atomics" OFF)
+    if(NO_ATOMICS)
+        add_definitions(-DNO_ATOMICS=1)
+    endif(NO_ATOMICS)
+endif(UNIX)
+
+if(X64 AND NOT WIN32)
+    option(ENABLE_PIC "Enable Position Independent Code" ON)
+else()
+    option(ENABLE_PIC "Enable Position Independent Code" OFF)
+endif(X64 AND NOT WIN32)
+
+# Compiler detection
+if(CMAKE_GENERATOR STREQUAL "Xcode")
+  set(XCODE 1)
+endif()
+if(APPLE)
+  add_definitions(-DMACOS)
+endif()
+
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+    set(CLANG 1)
+endif()
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
+    set(INTEL_CXX 1)
+endif()
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
+    set(GCC 1)
+endif()
+
+if(INTEL_CXX AND WIN32)
+    # treat icl roughly like MSVC
+    set(MSVC 1)
+endif()
+if(MSVC)
+    if(STATIC_LINK_CRT)
+        set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
+        foreach(CompilerFlag ${CompilerFlags})
+            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
+        endforeach()
+    endif(STATIC_LINK_CRT)
+    add_definitions(/W4)  # Full warnings
+    add_definitions(/Ob2) # always inline
+    add_definitions(/MP)  # multithreaded build
+
+    # disable Microsofts suggestions for proprietary secure APIs
+    add_definitions(/D_CRT_SECURE_NO_WARNINGS)
+
+    check_include_files(stdint.h HAVE_STDINT_H)
+    if(NOT HAVE_STDINT_H)
+        include_directories(compat/msvc)
+    endif()
+endif(MSVC)
+
+check_include_files(inttypes.h HAVE_INT_TYPES_H)
+if(HAVE_INT_TYPES_H)
+    add_definitions(-DHAVE_INT_TYPES_H=1)
+endif()
+
+if(INTEL_CXX AND UNIX)
+    set(GCC 1) # treat icpc roughly like gcc
+elseif(CLANG)
+    set(GCC 1) # treat clang roughly like gcc
+elseif(CMAKE_COMPILER_IS_GNUCXX)
+    set(GCC 1)
+endif()
+if(GCC)
+    add_definitions(-Wall -Wextra -Wshadow)
+    add_definitions(-D__STDC_LIMIT_MACROS=1)
+    if(ENABLE_PIC)
+         add_definitions(-fPIC)
+    endif(ENABLE_PIC)
+    if(NATIVE_BUILD)
+        if(INTEL_CXX)
+            add_definitions(-xhost)
+        else()
+            add_definitions(-march=native)
+        endif()
+    elseif(X86 AND NOT X64)
+        add_definitions(-march=i686)
+    endif()
+    if(ARM)
+        add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp)
+    endif()
+    if(FPROFILE_GENERATE)
+        if(INTEL_CXX)
+            add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}")
+            list(APPEND LINKER_OPTIONS "-prof-gen")
+        else()
+            check_cxx_compiler_flag(-fprofile-generate CC_HAS_PROFILE_GENERATE)
+            if(CC_HAS_PROFILE_GENERATE)
+                add_definitions(-fprofile-generate)
+                list(APPEND LINKER_OPTIONS "-fprofile-generate")
+            endif(CC_HAS_PROFILE_GENERATE)
+        endif(INTEL_CXX)
+    endif(FPROFILE_GENERATE)
+    if(FPROFILE_USE)
+        if(INTEL_CXX)
+            add_definitions(-prof-use -prof-dir="${CMAKE_CURRENT_BINARY_DIR}")
+            list(APPEND LINKER_OPTIONS "-prof-use")
+        else()
+            check_cxx_compiler_flag(-fprofile-use CC_HAS_PROFILE_USE)
+            check_cxx_compiler_flag(-fprofile-correction CC_HAS_PROFILE_CORRECTION)
+            check_cxx_compiler_flag(-Wno-error=coverage-mismatch CC_HAS_COVMISMATCH)
+            if(CC_HAS_PROFILE_USE)
+                add_definitions(-fprofile-use)
+                list(APPEND LINKER_OPTIONS "-fprofile-use")
+            endif(CC_HAS_PROFILE_USE)
+            if(CC_HAS_PROFILE_CORRECTION)
+                # auto-correct corrupted counters (happens a lot with x265)
+                add_definitions(-fprofile-correction)
+            endif(CC_HAS_PROFILE_CORRECTION)
+            if(CC_HAS_COVMISMATCH)
+                # ignore coverage mismatches (also happens a lot)
+                add_definitions(-Wno-error=coverage-mismatch)
+            endif(CC_HAS_COVMISMATCH)
+        endif(INTEL_CXX)
+    endif(FPROFILE_USE)
+    if(STATIC_LINK_CRT)
+        add_definitions(-static)
+        list(APPEND LINKER_OPTIONS "-static")
+    endif(STATIC_LINK_CRT)
+    check_cxx_compiler_flag(-Wno-strict-overflow CC_HAS_NO_STRICT_OVERFLOW)
+    check_cxx_compiler_flag(-Wno-narrowing CC_HAS_NO_NARROWING) 
+    check_cxx_compiler_flag(-Wno-array-bounds CC_HAS_NO_ARRAY_BOUNDS) 
+    if (CC_HAS_NO_ARRAY_BOUNDS)
+        add_definitions(-Wno-array-bounds) # these are unhelpful
+    endif()
+    check_cxx_compiler_flag(-ffast-math CC_HAS_FAST_MATH) 
+    if (CC_HAS_FAST_MATH)
+        add_definitions(-ffast-math)
+    endif()
+    check_cxx_compiler_flag(-mstackrealign CC_HAS_STACK_REALIGN) 
+    if (CC_HAS_STACK_REALIGN)
+        add_definitions(-mstackrealign)
+    endif()
+    # Disable exceptions. Reduce executable size, increase compability.
+    check_cxx_compiler_flag(-fno-exceptions CC_HAS_FNO_EXCEPTIONS_FLAG)
+    if(CC_HAS_FNO_EXCEPTIONS_FLAG)
+        add_definitions(-fno-exceptions)
+    endif()
+    set(FSANITIZE "" CACHE STRING "-fsanitize options for GCC/clang")
+    if(FSANITIZE)
+        add_definitions(-fsanitize=${FSANITIZE})
+        # clang and gcc need the sanitize options to be passed at link
+        # time so the appropriate ASAN/TSAN runtime libraries can be
+        # linked.
+        list(APPEND LINKER_OPTIONS "-fsanitize=${FSANITIZE}")
+    endif()
+    option(ENABLE_AGGRESSIVE_CHECKS "Enable stack protection and -ftrapv" OFF)
+    if(ENABLE_AGGRESSIVE_CHECKS)
+        # use with care, -ftrapv can cause testbench SIGILL exceptions
+        # since it is testing corner cases of signed integer math
+        add_definitions(-DUSING_FTRAPV=1)
+        check_cxx_compiler_flag(-fsanitize=undefined-trap CC_HAS_CATCH_UNDEFINED) # clang
+        check_cxx_compiler_flag(-ftrapv CC_HAS_FTRAPV)                            # gcc
+        check_cxx_compiler_flag(-fstack-protector-all CC_HAS_STACK_PROTECT)       # gcc
+        if(CC_HAS_FTRAPV)
+            add_definitions(-ftrapv)
+        endif()
+        if(CC_HAS_CATCH_UNDEFINED)
+            add_definitions(-fsanitize=undefined-trap -fsanitize-undefined-trap-on-error)
+        endif()
+        if(CC_HAS_STACK_PROTECT)
+            add_definitions(-fstack-protector-all)
+            if(MINGW)
+                list(APPEND PLATFORM_LIBS ssp)
+            endif()
+        endif()
+    endif(ENABLE_AGGRESSIVE_CHECKS)
+    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CC_VERSION)
+endif(GCC)
+
+find_package(Yasm)
+if(YASM_FOUND AND X86)
+    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
+        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
+        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
+    else()
+        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
+        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
+    endif()
+else()
+    option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
+endif()
+
+# no need to have slow encoding on x86
+if(X86 AND NOT ENABLE_ASSEMBLY)
+    message(FATAL_ERROR "Yasm 1.2.0 or later must be installed")
+endif()
+
+option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF)
+if(CHECKED_BUILD)
+    add_definitions(-DCHECKED_BUILD=1)
+endif()
+
+# Build options
+set(LIB_INSTALL_DIR lib CACHE STRING "Install location of libraries")
+set(BIN_INSTALL_DIR bin CACHE STRING "Install location of executables")
+set(EXTRA_LIB "" CACHE STRING "Extra libraries to link against")
+set(EXTRA_LINK_FLAGS "" CACHE STRING "Extra link flags")
+if(EXTRA_LINK_FLAGS)
+    list(APPEND LINKER_OPTIONS ${EXTRA_LINK_FLAGS})
+endif()
+
+option(LINKED_8BIT  "8bit libx265 is being linked with this library" OFF)
+option(LINKED_10BIT "10bit libx265 is being linked with this library" OFF)
+option(LINKED_12BIT "12bit libx265 is being linked with this library" OFF)
+
+mark_as_advanced(EXTRA_LIB EXTRA_LINK_FLAGS)
+
+if(X64)
+    # NOTE: We only officially support high-bit-depth compiles of x265
+    # on 64bit architectures. Main10 plus large resolution plus slow
+    # preset plus 32bit address space usually means malloc failure.  You
+    # can disable this if(X64) check if you desparately need a 32bit
+    # build with 10bit/12bit support, but this violates the "shrink wrap
+    # license" so to speak.  If it breaks you get to keep both halves.
+    # You will need to disable assembly manually.
+    option(HIGH_BIT_DEPTH "Store pixel samples as 16bit values (Main10/Main12)" OFF)
+endif(X64)
+if(HIGH_BIT_DEPTH)
+    option(MAIN12 "Support Main12 instead of Main10" OFF)
+    if(MAIN12)
+        add_definitions(-DHIGH_BIT_DEPTH=1 -DX265_DEPTH=12)
+    else()
+        add_definitions(-DHIGH_BIT_DEPTH=1 -DX265_DEPTH=10)
+    endif()
+else(HIGH_BIT_DEPTH)
+    add_definitions(-DHIGH_BIT_DEPTH=0 -DX265_DEPTH=8)
+endif(HIGH_BIT_DEPTH)
+
+# this option can only be used when linking multiple libx265 libraries
+# together, and some alternate API access method is implemented.
+option(EXPORT_C_API "Implement public C programming interface" ON)
+mark_as_advanced(EXPORT_C_API)
+if(EXPORT_C_API)
+    set(X265_NS x265)
+    add_definitions(-DEXPORT_C_API=1)
+elseif(HIGH_BIT_DEPTH)
+    if(MAIN12)
+        set(X265_NS x265_12bit)
+    else()
+        set(X265_NS x265_10bit)
+    endif()
+    add_definitions(-DEXPORT_C_API=0)
+else()
+    set(X265_NS x265_8bit)
+    add_definitions(-DEXPORT_C_API=0)
+endif()
+add_definitions(-DX265_NS=${X265_NS})
+
+option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
+if(WARNINGS_AS_ERRORS)
+    if(GCC)
+        add_definitions(-Werror)
+    elseif(MSVC)
+        add_definitions(/WX)
+    endif()
+endif(WARNINGS_AS_ERRORS)
+
+if(WIN32)
+    # Visual leak detector
+    find_package(VLD QUIET)
+    if(VLD_FOUND)
+        add_definitions(-DHAVE_VLD)
+        include_directories(${VLD_INCLUDE_DIRS})
+        list(APPEND PLATFORM_LIBS ${VLD_LIBRARIES})
+        link_directories(${VLD_LIBRARY_DIRS})
+    endif()
+    option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF)
+    if(WINXP_SUPPORT)
+        # force use of workarounds for CONDITION_VARIABLE and atomic
+        # intrinsics introduced after XP
+        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP -D_WIN32_WINNT_WIN7=0x0601)
+    else(WINXP_SUPPORT)
+        # default to targeting Windows 7 for the NUMA APIs
+        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WIN7)
+    endif(WINXP_SUPPORT)
+endif()
+
+include(version) # determine X265_VERSION and X265_LATEST_TAG
+include_directories(. common encoder "${PROJECT_BINARY_DIR}")
+
+option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
+if(ENABLE_PPA)
+    add_definitions(-DENABLE_PPA)
+    list(APPEND PLATFORM_LIBS PPA)
+    if(UNIX)
+        list(APPEND PLATFORM_LIBS dl)
+    endif(UNIX)
+    add_subdirectory(profile/PPA)
+endif(ENABLE_PPA)
+
+option(ENABLE_VTUNE "Enable Vtune profiling instrumentation" OFF)
+if(ENABLE_VTUNE)
+    add_definitions(-DENABLE_VTUNE)
+    include_directories($ENV{VTUNE_AMPLIFIER_XE_2015_DIR}/include)
+    list(APPEND PLATFORM_LIBS vtune)
+    link_directories($ENV{VTUNE_AMPLIFIER_XE_2015_DIR}/lib64)
+    if(WIN32)
+        list(APPEND PLATFORM_LIBS libittnotify.lib)
+    else()
+        list(APPEND PLATFORM_LIBS libittnotify.a dl)
+    endif()
+    add_subdirectory(profile/vtune)
+endif(ENABLE_VTUNE)
+
+option(DETAILED_CU_STATS "Enable internal profiling of encoder work" OFF)
+if(DETAILED_CU_STATS)
+    add_definitions(-DDETAILED_CU_STATS)
+endif(DETAILED_CU_STATS)
+
+add_subdirectory(encoder)
+add_subdirectory(common)
+
+if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY)
+    # this is required because of this cmake bug
+    # http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170
+    if(WIN32)
+        set(SUFFIX obj)
+    else()
+        set(SUFFIX o)
+    endif()
+    foreach(ASM ${MSVC_ASMS})
+        set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM})
+        list(APPEND YASM_SRCS ${YASM_SRC})
+        list(APPEND YASM_OBJS ${ASM}.${SUFFIX})
+        add_custom_command(
+            OUTPUT ${ASM}.${SUFFIX}
+            COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${YASM_SRC} -o ${ASM}.${SUFFIX}
+            DEPENDS ${YASM_SRC})
+    endforeach()
+endif()
+
+source_group(ASM FILES ${YASM_SRCS})
+add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
+if(NOT MSVC)
+    set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
+endif()
+if(EXTRA_LIB)
+    target_link_libraries(x265-static ${EXTRA_LIB})
+endif()
+install(TARGETS x265-static
+    LIBRARY DESTINATION ${LIB_INSTALL_DIR}
+    ARCHIVE DESTINATION ${LIB_INSTALL_DIR})
+install(FILES x265.h "${PROJECT_BINARY_DIR}/x265_config.h" DESTINATION include)
+
+if(CMAKE_RC_COMPILER)
+    # The resource compiler does not need CFLAGS or macro defines. It
+    # often breaks them
+    string(REPLACE "<FLAGS>" "" CMAKE_RC_COMPILE_OBJECT "${CMAKE_RC_COMPILE_OBJECT}")
+    string(REPLACE "<DEFINES>" "" CMAKE_RC_COMPILE_OBJECT "${CMAKE_RC_COMPILE_OBJECT}")
+
+    # convert X265_LATEST_TAG (ex: 0.7) and X265_TAG_DISTANCE (ex: 103) to
+    # @X265_VERSION_MAJOR@,@X265_VERSION_MINOR@,@X265_BRANCH_ID@,@X265_TAG_DISTANCE@
+    string(REPLACE "." ";" VERSION_LIST "${X265_LATEST_TAG}")
+    list(GET VERSION_LIST 0 X265_VERSION_MAJOR)
+    list(GET VERSION_LIST 1 X265_VERSION_MINOR)
+    set(X265_BRANCH_ID 0) # TODO: 0 - stable, 1 - default or other
+    set(X265_RC_FILE "${CMAKE_CURRENT_BINARY_DIR}/x265.rc")
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/x265.rc.in" "${X265_RC_FILE}" @ONLY)
+endif()
+
+if(NOT (MSVC_IDE OR XCODE))
+    add_custom_target(clean-generated COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/clean-generated.cmake)
+endif()
+
+option(ENABLE_SHARED "Build shared library" ON)
+if(ENABLE_SHARED)
+    add_library(x265-shared SHARED "${PROJECT_BINARY_DIR}/x265.def" ${YASM_OBJS}
+                ${X265_RC_FILE} $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common>)
+    target_link_libraries(x265-shared ${PLATFORM_LIBS})
+    if(MSVC)
+        set_target_properties(x265-shared PROPERTIES OUTPUT_NAME libx265)
+    else()
+        set_target_properties(x265-shared PROPERTIES OUTPUT_NAME x265)
+    endif()
+    if(UNIX)
+        set_target_properties(x265-shared PROPERTIES VERSION ${X265_BUILD})
+        if(APPLE)
+            set_target_properties(x265-shared PROPERTIES MACOSX_RPATH 1)
+        else()
+            list(APPEND LINKER_OPTIONS "-Wl,-Bsymbolic,-znoexecstack")
+        endif()
+    endif()
+    set_target_properties(x265-shared PROPERTIES SOVERSION ${X265_BUILD})
+    if(X265_LATEST_TAG)
+        if(WINDOWS)
+            set_target_properties(x265-shared PROPERTIES VERSION ${X265_LATEST_TAG})
+        endif()
+        # shared library is not installed if a tag is not found
+        install(TARGETS x265-shared
+                LIBRARY DESTINATION ${LIB_INSTALL_DIR}
+                ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
+                RUNTIME DESTINATION ${BIN_INSTALL_DIR})
+    endif()
+    if(EXTRA_LIB)
+        target_link_libraries(x265-shared ${EXTRA_LIB})
+    endif()
+    if(LINKER_OPTIONS)
+        # set_target_properties can't do list expansion
+        string(REPLACE ";" " " LINKER_OPTION_STR "${LINKER_OPTIONS}")
+        set_target_properties(x265-shared PROPERTIES LINK_FLAGS "${LINKER_OPTION_STR}")
+    endif()
+endif()
+
+if(X265_LATEST_TAG)
+    # convert lists of link libraries into -lstdc++ -lm etc..
+    foreach(LIB ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES} ${PLATFORM_LIBS})
+        if(IS_ABSOLUTE ${LIB} AND EXISTS ${LIB})
+            list(APPEND PLIBLIST "${LIB}")
+        else()
+            list(APPEND PLIBLIST "-l${LIB}")
+        endif()
+    endforeach()
+    if(PLIBLIST)
+        # blacklist of libraries that should not be in Libs.private
+        list(REMOVE_ITEM PLIBLIST "-lc" "-lpthread")
+        string(REPLACE ";" " " PRIVATE_LIBS "${PLIBLIST}")
+    else()
+        set(PRIVATE_LIBS "")
+    endif(PLIBLIST)
+
+    # Produce a pkg-config file
+    configure_file("x265.pc.in" "x265.pc" @ONLY)
+    install(FILES       "${CMAKE_CURRENT_BINARY_DIR}/x265.pc"
+            DESTINATION "${LIB_INSTALL_DIR}/pkgconfig")
+endif()
+
+if(NOT WIN32)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in"
+                   "${CMAKE_CURRENT_BINARY_DIR}/cmake/cmake_uninstall.cmake"
+                   IMMEDIATE @ONLY)
+    add_custom_target(uninstall
+                      "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake/cmake_uninstall.cmake")
+endif()
+
+# Main CLI application
+set(ENABLE_CLI ON CACHE BOOL "Build standalone CLI application")
+if(ENABLE_CLI)
+    file(GLOB InputFiles input/input.cpp input/yuv.cpp input/y4m.cpp input/*.h)
+    file(GLOB OutputFiles output/output.cpp output/reconplay.cpp output/*.h
+                          output/yuv.cpp output/y4m.cpp # recon
+                          output/raw.cpp)               # muxers
+    source_group(input FILES ${InputFiles})
+    source_group(output FILES ${OutputFiles})
+
+    check_include_files(getopt.h HAVE_GETOPT_H)
+    if(NOT HAVE_GETOPT_H)
+        if(MSVC)
+            set_source_files_properties(compat/getopt/getopt.c PROPERTIES COMPILE_FLAGS "/wd4100 /wd4131 -DHAVE_STRING_H=1")
+        endif(MSVC)
+        include_directories(compat/getopt)
+        set(GETOPT compat/getopt/getopt.c compat/getopt/getopt.h)
+    endif(NOT HAVE_GETOPT_H)
+    if(WIN32)
+        set(ExportDefs "${PROJECT_BINARY_DIR}/x265.def")
+    endif(WIN32)
+
+    if(XCODE)
+        # Xcode seems unable to link the CLI with libs, so link as one targget
+        add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
+                       x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp
+                       $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
+    else()
+        add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
+                       ${ExportDefs} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp)
+        if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX)
+            # The CLI cannot link to the shared library on Windows, it
+            # requires internal APIs not exported from the DLL
+            target_link_libraries(cli x265-static ${PLATFORM_LIBS})
+        else()
+            target_link_libraries(cli x265-shared ${PLATFORM_LIBS})
+        endif()
+    endif()
+    set_target_properties(cli PROPERTIES OUTPUT_NAME x265)
+    if(LINKER_OPTIONS)
+        # set_target_properties can't do list expansion
+        string(REPLACE ";" " " LINKER_OPTION_STR "${LINKER_OPTIONS}")
+        set_target_properties(cli PROPERTIES LINK_FLAGS "${LINKER_OPTION_STR}")
+    endif()
+
+    install(TARGETS cli DESTINATION ${BIN_INSTALL_DIR})
+endif(ENABLE_CLI)
+
+if(ENABLE_ASSEMBLY AND NOT XCODE)
+    option(ENABLE_TESTS "Enable Unit Tests" OFF)
+    if(ENABLE_TESTS)
+        add_subdirectory(test)
+    endif()
+endif()
--- a/x265/source/cmake/CMakeASM_YASMInformation.cmake
+++ b/x265/source/cmake/CMakeASM_YASMInformation.cmake
@ -0,0 +1,68 @@
+set(ASM_DIALECT "_YASM")
+set(CMAKE_ASM${ASM_DIALECT}_SOURCE_FILE_EXTENSIONS asm)
+
+if(X64)
+    list(APPEND ASM_FLAGS -DARCH_X86_64=1)
+    if(ENABLE_PIC)
+        list(APPEND ASM_FLAGS -DPIC)
+    endif()
+    if(APPLE)
+        set(ARGS -f macho64 -m amd64 -DPREFIX)
+    elseif(UNIX AND NOT CYGWIN)
+        set(ARGS -f elf64 -m amd64)
+    else()
+        set(ARGS -f win64 -m amd64)
+    endif()
+else()
+    list(APPEND ASM_FLAGS -DARCH_X86_64=0)
+    if(APPLE)
+        set(ARGS -f macho -DPREFIX)
+    elseif(UNIX AND NOT CYGWIN)
+        set(ARGS -f elf32)
+    else()
+        set(ARGS -f win32 -DPREFIX)
+    endif()
+endif()
+
+if(GCC)
+    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=1)
+else()
+    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=0)
+endif()
+
+if(HIGH_BIT_DEPTH)
+    if(MAIN12)
+        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
+    else()
+        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
+    endif()
+else()
+    list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
+endif()
+
+list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS}")
+
+if(CMAKE_BUILD_TYPE MATCHES Release)
+    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELEASE}")
+elseif(CMAKE_BUILD_TYPE MATCHES Debug)
+    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_DEBUG}")
+elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel)
+    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_MINSIZEREL}")
+elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
+    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELWITHDEBINFO}")
+endif()
+
+set(YASM_FLAGS ${ARGS} ${ASM_FLAGS} PARENT_SCOPE)
+string(REPLACE ";" " " CMAKE_ASM_YASM_COMPILER_ARG1 "${ARGS}")
+
+# This section exists to override the one in CMakeASMInformation.cmake
+# (the default Information file). This removes the <FLAGS>
+# thing so that your C compiler flags that have been set via
+# set_target_properties don't get passed to yasm and confuse it.
+if(NOT CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT)
+    string(REPLACE ";" " " STR_ASM_FLAGS "${ASM_FLAGS}")
+    set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT "<CMAKE_ASM${ASM_DIALECT}_COMPILER> ${STR_ASM_FLAGS} -o <OBJECT> <SOURCE>")
+endif()
+
+include(CMakeASMInformation)
+set(ASM_DIALECT)
--- a/x265/source/cmake/CMakeDetermineASM_YASMCompiler.cmake
+++ b/x265/source/cmake/CMakeDetermineASM_YASMCompiler.cmake
@ -0,0 +1,5 @@
+set(ASM_DIALECT "_YASM")
+set(CMAKE_ASM${ASM_DIALECT}_COMPILER ${YASM_EXECUTABLE})
+set(CMAKE_ASM${ASM_DIALECT}_COMPILER_INIT ${_CMAKE_TOOLCHAIN_PREFIX}yasm)
+include(CMakeDetermineASMCompiler)
+set(ASM_DIALECT)
--- a/x265/source/cmake/CMakeTestASM_YASMCompiler.cmake
+++ b/x265/source/cmake/CMakeTestASM_YASMCompiler.cmake
@ -0,0 +1,3 @@
+set(ASM_DIALECT "_YASM")
+include(CMakeTestASMCompiler)
+set(ASM_DIALECT)
--- a/x265/source/cmake/FindNuma.cmake
+++ b/x265/source/cmake/FindNuma.cmake
@ -0,0 +1,43 @@
+# Module for locating libnuma
+#
+# Read-only variables:
+#   NUMA_FOUND
+#     Indicates that the library has been found.
+#
+#   NUMA_INCLUDE_DIR
+#     Points to the libnuma include directory.
+#
+#   NUMA_LIBRARY_DIR
+#     Points to the directory that contains the libraries.
+#     The content of this variable can be passed to link_directories.
+#
+#   NUMA_LIBRARY
+#     Points to the libnuma that can be passed to target_link_libararies.
+#
+# Copyright (c) 2015 Steve Borho
+
+include(FindPackageHandleStandardArgs)
+
+find_path(NUMA_ROOT_DIR
+  NAMES include/numa.h
+  PATHS ENV NUMA_ROOT
+  DOC "NUMA root directory")
+
+find_path(NUMA_INCLUDE_DIR
+  NAMES numa.h
+  HINTS ${NUMA_ROOT_DIR}
+  PATH_SUFFIXES include
+  DOC "NUMA include directory")
+
+find_library(NUMA_LIBRARY
+  NAMES numa
+  HINTS ${NUMA_ROOT_DIR}
+  DOC "NUMA library")
+
+if (NUMA_LIBRARY)
+    get_filename_component(NUMA_LIBRARY_DIR ${NUMA_LIBRARY} PATH)
+endif()
+
+mark_as_advanced(NUMA_INCLUDE_DIR NUMA_LIBRARY_DIR NUMA_LIBRARY)
+
+find_package_handle_standard_args(NUMA REQUIRED_VARS NUMA_ROOT_DIR NUMA_INCLUDE_DIR NUMA_LIBRARY)
--- a/x265/source/cmake/FindVLD.cmake
+++ b/x265/source/cmake/FindVLD.cmake
@ -0,0 +1,126 @@
+# Module for locating Visual Leak Detector.
+#
+# Customizable variables:
+#   VLD_ROOT_DIR
+#     This variable points to the Visual Leak Detector root directory. By
+#     default, the module looks for the installation directory by examining the
+#     Program Files/Program Files (x86) folders and the VLDROOT environment
+#     variable.
+#
+# Read-only variables:
+#   VLD_FOUND
+#     Indicates that the library has been found.
+#
+#   VLD_INCLUDE_DIRS
+#     Points to the Visual Leak Detector include directory.
+#
+#   VLD_LIBRARY_DIRS
+#     Points to the Visual Leak Detector directory that contains the libraries.
+#     The content of this variable can be passed to link_directories.
+#
+#   VLD_LIBRARIES
+#     Points to the Visual Leak Detector libraries that can be passed to
+#     target_link_libararies.
+#
+#
+# Copyright (c) 2012 Sergiu Dotenco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+INCLUDE (FindPackageHandleStandardArgs)
+
+SET (_VLD_POSSIBLE_LIB_SUFFIXES lib)
+
+# Version 2.0 uses vld_x86 and vld_x64 instead of simply vld as library names
+IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+  LIST (APPEND _VLD_POSSIBLE_LIB_SUFFIXES lib/Win32)
+ELSEIF (CMAKE_SIZEOF_VOID_P EQUAL 8)
+  LIST (APPEND _VLD_POSSIBLE_LIB_SUFFIXES lib/Win64)
+ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+
+SET (PFILES "ProgramFiles")
+SET (PFILES_X86 "ProgramFiles(x86)") # hack to avoid escaping issues in cmake 3.1
+
+FIND_PATH (VLD_ROOT_DIR
+  NAMES include/vld.h
+  PATHS ENV VLDROOT
+        "$ENV{PFILES}/Visual Leak Detector"
+        "$ENV{PFILES_X86}/Visual Leak Detector"
+        "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Visual Leak Detector;InstallLocation]"
+        "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Visual Leak Detector;InstallLocation]"
+  DOC "VLD root directory")
+
+FIND_PATH (VLD_INCLUDE_DIR
+  NAMES vld.h
+  HINTS ${VLD_ROOT_DIR}
+  PATH_SUFFIXES include
+  DOC "VLD include directory")
+
+FIND_LIBRARY (VLD_LIBRARY_DEBUG
+  NAMES vld
+  HINTS ${VLD_ROOT_DIR}
+  PATH_SUFFIXES ${_VLD_POSSIBLE_LIB_SUFFIXES}
+  DOC "VLD debug library")
+
+IF (VLD_ROOT_DIR)
+  SET (_VLD_VERSION_FILE ${VLD_ROOT_DIR}/CHANGES.txt)
+
+  IF (EXISTS ${_VLD_VERSION_FILE})
+    SET (_VLD_VERSION_REGEX
+      "Visual Leak Detector \\(VLD\\) Version (([0-9]+)\\.([0-9]+)([a-z]|(.([0-9]+)))?)")
+    FILE (STRINGS ${_VLD_VERSION_FILE} _VLD_VERSION_TMP REGEX
+      ${_VLD_VERSION_REGEX})
+
+    STRING (REGEX REPLACE ${_VLD_VERSION_REGEX} "\\1" _VLD_VERSION_TMP
+      "${_VLD_VERSION_TMP}")
+
+    STRING (REGEX REPLACE "([0-9]+).([0-9]+).*" "\\1" VLD_VERSION_MAJOR
+      "${_VLD_VERSION_TMP}")
+    STRING (REGEX REPLACE "([0-9]+).([0-9]+).*" "\\2" VLD_VERSION_MINOR
+      "${_VLD_VERSION_TMP}")
+
+    SET (VLD_VERSION ${VLD_VERSION_MAJOR}.${VLD_VERSION_MINOR})
+
+    IF ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+      # major.minor.patch version numbering scheme
+      STRING (REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\3"
+        VLD_VERSION_PATCH "${_VLD_VERSION_TMP}")
+      SET (VLD_VERSION "${VLD_VERSION}.${VLD_VERSION_PATCH}")
+      SET (VLD_VERSION_COUNT 3)
+    ELSE ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+      # major.minor version numbering scheme. The trailing letter is ignored.
+      SET (VLD_VERSION_COUNT 2)
+    ENDIF ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+  ENDIF (EXISTS ${_VLD_VERSION_FILE})
+ENDIF (VLD_ROOT_DIR)
+
+IF (VLD_LIBRARY_DEBUG)
+  SET (VLD_LIBRARY debug ${VLD_LIBRARY_DEBUG} CACHE DOC "VLD library")
+  GET_FILENAME_COMPONENT (_VLD_LIBRARY_DIR ${VLD_LIBRARY_DEBUG} PATH)
+  SET (VLD_LIBRARY_DIR ${_VLD_LIBRARY_DIR} CACHE PATH "VLD library directory")
+ENDIF (VLD_LIBRARY_DEBUG)
+
+SET (VLD_INCLUDE_DIRS ${VLD_INCLUDE_DIR})
+SET (VLD_LIBRARY_DIRS ${VLD_LIBRARY_DIR})
+SET (VLD_LIBRARIES ${VLD_LIBRARY})
+
+MARK_AS_ADVANCED (VLD_INCLUDE_DIR VLD_LIBRARY_DIR VLD_LIBRARY_DEBUG VLD_LIBRARY)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS (VLD REQUIRED_VARS VLD_ROOT_DIR
+  VLD_INCLUDE_DIR VLD_LIBRARY VERSION_VAR VLD_VERSION)
--- a/x265/source/cmake/FindYasm.cmake
+++ b/x265/source/cmake/FindYasm.cmake
@ -0,0 +1,25 @@
+include(FindPackageHandleStandardArgs)
+
+# Simple path search with YASM_ROOT environment variable override
+find_program(YASM_EXECUTABLE 
+ NAMES yasm yasm-1.2.0-win32 yasm-1.2.0-win64 yasm yasm-1.3.0-win32 yasm-1.3.0-win64
+ HINTS $ENV{YASM_ROOT} ${YASM_ROOT}
+ PATH_SUFFIXES bin
+)
+
+if(YASM_EXECUTABLE)
+    execute_process(COMMAND ${YASM_EXECUTABLE} --version
+        OUTPUT_VARIABLE yasm_version
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+    if(yasm_version MATCHES "^yasm ([0-9\\.]*)")
+        set(YASM_VERSION_STRING "${CMAKE_MATCH_1}")
+    endif()
+    unset(yasm_version)
+endif()
+
+# Provide standardized success/failure messages
+find_package_handle_standard_args(yasm
+    REQUIRED_VARS YASM_EXECUTABLE
+    VERSION_VAR YASM_VERSION_STRING)
--- a/x265/source/cmake/clean-generated.cmake
+++ b/x265/source/cmake/clean-generated.cmake
@ -0,0 +1,10 @@
+set(generated "${CMAKE_CURRENT_BINARY_DIR}/x265.rc"
+              "${CMAKE_CURRENT_BINARY_DIR}/x265.pc"
+              "${CMAKE_CURRENT_BINARY_DIR}/x265.def"
+              "${CMAKE_CURRENT_BINARY_DIR}/x265_config.h")
+
+foreach(file ${generated})
+  if(EXISTS ${file})
+     file(REMOVE ${file})
+  endif()
+endforeach(file)
--- a/x265/source/cmake/cmake_uninstall.cmake.in
+++ b/x265/source/cmake/cmake_uninstall.cmake.in
@ -0,0 +1,19 @@
+if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+    message(FATAL_ERROR "Cannot find install manifest: '@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt'")
+endif()
+
+file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+string(REGEX REPLACE "\n" ";" files "${files}")
+foreach(file ${files})
+    message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
+    if(EXISTS "$ENV{DESTDIR}${file}" OR IS_SYMLINK "$ENV{DESTDIR}${file}")
+        exec_program("@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+                     OUTPUT_VARIABLE rm_out
+                     RETURN_VALUE rm_retval)
+        if(NOT "${rm_retval}" STREQUAL 0)
+            message(FATAL_ERROR "Problem when removing '$ENV{DESTDIR}${file}'")
+        endif(NOT "${rm_retval}" STREQUAL 0)
+    else()
+        message(STATUS "File '$ENV{DESTDIR}${file}' does not exist.")
+    endif()
+endforeach(file)
--- a/x265/source/cmake/version.cmake
+++ b/x265/source/cmake/version.cmake
@ -0,0 +1,90 @@
+if(CMAKE_VERSION VERSION_LESS "2.8.10")
+    find_program(HG_EXECUTABLE hg)
+else()
+    find_package(Hg QUIET)
+endif()
+find_package(Git QUIET) # present in 2.8.8
+
+# defaults, in case everything below fails
+set(X265_VERSION "unknown")
+set(X265_LATEST_TAG "0.0")
+set(X265_TAG_DISTANCE "0")
+
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../.hg_archival.txt)
+    # read the lines of the archive summary file to extract the version
+    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../.hg_archival.txt archive)
+    STRING(REGEX REPLACE "\n" ";" archive "${archive}")
+    foreach(f ${archive})
+        string(FIND "${f}" ": " pos)
+        string(SUBSTRING "${f}" 0 ${pos} key)
+        string(SUBSTRING "${f}" ${pos} -1 value)
+        string(SUBSTRING "${value}" 2 -1 value)
+        set(hg_${key} ${value})
+    endforeach()
+    if(DEFINED hg_tag)
+        set(X265_VERSION ${hg_tag})
+        set(X265_LATEST_TAG ${hg_tag})
+        set(X265_TAG_DISTANCE "0")
+    elseif(DEFINED hg_node)
+        string(SUBSTRING "${hg_node}" 0 16 hg_id)
+        set(X265_VERSION "${hg_latesttag}+${hg_latesttagdistance}-${hg_id}")
+    endif()
+elseif(HG_EXECUTABLE AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../.hg)
+    if(EXISTS "${HG_EXECUTABLE}.bat")
+        # mercurial source installs on Windows require .bat extension
+        set(HG_EXECUTABLE "${HG_EXECUTABLE}.bat")
+    endif()
+    message(STATUS "hg found at ${HG_EXECUTABLE}")
+
+    execute_process(COMMAND
+        ${HG_EXECUTABLE} log -r. --template "{latesttag}"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE X265_LATEST_TAG
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+    execute_process(COMMAND
+        ${HG_EXECUTABLE} log -r. --template "{latesttagdistance}"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE X265_TAG_DISTANCE
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+    execute_process(
+        COMMAND
+        ${HG_EXECUTABLE} log -r. --template "{node|short}"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE HG_REVISION_ID
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+
+    if(X265_LATEST_TAG MATCHES "^r")
+        string(SUBSTRING ${X265_LATEST_TAG} 1 -1 X265_LATEST_TAG)
+    endif()
+    if(X265_TAG_DISTANCE STREQUAL "0")
+        set(X265_VERSION "${X265_LATEST_TAG}")
+    else()
+        set(X265_VERSION "${X265_LATEST_TAG}+${X265_TAG_DISTANCE}-${HG_REVISION_ID}")
+    endif()
+elseif(GIT_EXECUTABLE AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../.git)
+    execute_process(
+        COMMAND
+        ${GIT_EXECUTABLE} describe --tags --abbrev=0
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE X265_LATEST_TAG
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+
+    execute_process(
+        COMMAND
+        ${GIT_EXECUTABLE} describe --tags
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE X265_VERSION
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        )
+endif()
+
+message(STATUS "x265 version ${X265_VERSION}")
--- a/x265/source/common/CMakeLists.txt
+++ b/x265/source/common/CMakeLists.txt
@ -0,0 +1,127 @@
+# vim: syntax=cmake
+
+list(APPEND VFLAGS "-DX265_VERSION=${X265_VERSION}")
+if(EXTRA_LIB)
+    if(LINKED_8BIT)
+        list(APPEND VFLAGS "-DLINKED_8BIT=1")
+    endif(LINKED_8BIT)
+    if(LINKED_10BIT)
+        list(APPEND VFLAGS "-DLINKED_10BIT=1")
+    endif(LINKED_10BIT)
+    if(LINKED_12BIT)
+        list(APPEND VFLAGS "-DLINKED_12BIT=1")
+    endif(LINKED_12BIT)
+endif(EXTRA_LIB)
+
+if(ENABLE_ASSEMBLY)
+    set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
+    list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1")
+
+    set(SSE3  vec/dct-sse3.cpp)
+    set(SSSE3 vec/dct-ssse3.cpp)
+    set(SSE41 vec/dct-sse41.cpp)
+
+    if(MSVC AND X86)
+        set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
+        set(WARNDISABLE "/wd4100") # unreferenced formal parameter
+        if(INTEL_CXX)
+            add_definitions(/Qwd111) # statement is unreachable
+            add_definitions(/Qwd128) # loop is unreachable
+            add_definitions(/Qwd177) # declared function is unused
+            add_definitions(/Qwd185) # dynamic initialization in unreachable code
+            add_definitions(/Qwd280) # conditional expression is constant
+        endif()
+        if(X64)
+            set_source_files_properties(${SSE3} ${SSSE3} ${SSE41} PROPERTIES COMPILE_FLAGS "${WARNDISABLE}")
+        else()
+            # x64 implies SSE4, so only add /arch:SSE2 if building for Win32
+            set_source_files_properties(${SSE3} ${SSSE3} ${SSE41} PROPERTIES COMPILE_FLAGS "${WARNDISABLE} /arch:SSE2")
+        endif()
+    endif()
+    if(GCC AND X86)
+        if(CLANG)
+            # llvm intrinsic headers cause shadow warnings
+            set(WARNDISABLE "-Wno-shadow -Wno-unused-parameter")
+        else()
+            set(WARNDISABLE "-Wno-unused-parameter")
+        endif()
+        if(INTEL_CXX OR CLANG OR (NOT CC_VERSION VERSION_LESS 4.3))
+            set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
+            set_source_files_properties(${SSE3}  PROPERTIES COMPILE_FLAGS "${WARNDISABLE} -msse3")
+            set_source_files_properties(${SSSE3} PROPERTIES COMPILE_FLAGS "${WARNDISABLE} -mssse3")
+            set_source_files_properties(${SSE41} PROPERTIES COMPILE_FLAGS "${WARNDISABLE} -msse4.1")
+        endif()
+    endif()
+    set(VEC_PRIMITIVES vec/vec-primitives.cpp ${PRIMITIVES})
+    source_group(Intrinsics FILES ${VEC_PRIMITIVES})
+
+    set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
+    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm ssd-a.asm mc-a.asm
+               mc-a2.asm pixel-util8.asm blockcopy8.asm
+               pixeladd8.asm dct8.asm)
+    if(HIGH_BIT_DEPTH)
+        set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm loopfilter.asm)
+    else()
+        set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm intrapred8_allangs.asm ipfilter8.asm loopfilter.asm)
+    endif()
+
+    if(NOT X64)
+        set(A_SRCS ${A_SRCS} pixel-32.asm)
+    endif()
+
+    if(MSVC_IDE OR XCODE)
+        # MSVC requires custom build rules in the main cmake script for yasm
+        set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
+        set(A_SRCS)
+    endif()
+
+    enable_language(ASM_YASM)
+
+    foreach(SRC ${A_SRCS} ${C_SRCS})
+        set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${SRC})
+    endforeach()
+    source_group(Assembly FILES ${ASM_PRIMITIVES})
+endif(ENABLE_ASSEMBLY)
+
+# set_target_properties can't do list expansion
+string(REPLACE ";" " " VERSION_FLAGS "${VFLAGS}")
+set_source_files_properties(version.cpp PROPERTIES COMPILE_FLAGS ${VERSION_FLAGS})
+
+check_symbol_exists(strtok_r "string.h" HAVE_STRTOK_R)
+if(HAVE_STRTOK_R)
+    set_source_files_properties(param.cpp PROPERTIES COMPILE_FLAGS -DHAVE_STRTOK_R=1)
+endif()
+
+if(GCC AND CC_HAS_NO_NARROWING)
+    set_source_files_properties(cpu.cpp PROPERTIES COMPILE_FLAGS -Wno-narrowing)
+endif()
+if(WIN32)
+    set(WINXP winxp.h winxp.cpp)
+endif(WIN32)
+
+add_library(common OBJECT
+    ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${WINXP}
+    primitives.cpp primitives.h
+    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
+    constants.cpp constants.h
+    cpu.cpp cpu.h version.cpp
+    threading.cpp threading.h
+    threadpool.cpp threadpool.h
+    wavefront.h wavefront.cpp
+    md5.cpp md5.h
+    bitstream.h bitstream.cpp
+    yuv.cpp yuv.h
+    shortyuv.cpp shortyuv.h
+    picyuv.cpp picyuv.h
+    common.cpp common.h
+    param.cpp param.h
+    frame.cpp frame.h
+    framedata.cpp framedata.h
+    cudata.cpp cudata.h
+    slice.cpp slice.h
+    lowres.cpp lowres.h mv.h 
+    piclist.cpp piclist.h
+    predict.cpp  predict.h
+    scalinglist.cpp scalinglist.h
+    quant.cpp quant.h contexts.h
+    deblock.cpp deblock.h)
--- a/x265/source/common/bitstream.cpp
+++ b/x265/source/common/bitstream.cpp
@ -0,0 +1,129 @@
+#include "common.h"
+#include "bitstream.h"
+
+using namespace X265_NS;
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244)
+#endif
+
+#define MIN_FIFO_SIZE 1000
+
+Bitstream::Bitstream()
+{
+    m_fifo = X265_MALLOC(uint8_t, MIN_FIFO_SIZE);
+    m_byteAlloc = MIN_FIFO_SIZE;
+    resetBits();
+}
+
+void Bitstream::push_back(uint8_t val)
+{
+    if (!m_fifo)
+        return;
+
+    if (m_byteOccupancy >= m_byteAlloc)
+    {
+        /** reallocate buffer with doubled size */
+        uint8_t *temp = X265_MALLOC(uint8_t, m_byteAlloc * 2);
+        if (temp)
+        {
+            memcpy(temp, m_fifo, m_byteOccupancy);
+            X265_FREE(m_fifo);
+            m_fifo = temp;
+            m_byteAlloc *= 2;
+        }
+        else
+        {
+            x265_log(NULL, X265_LOG_ERROR, "Unable to realloc bitstream buffer");
+            return;
+        }
+    }
+    m_fifo[m_byteOccupancy++] = val;
+}
+
+void Bitstream::write(uint32_t val, uint32_t numBits)
+{
+    X265_CHECK(numBits <= 32, "numBits out of range\n");
+    X265_CHECK(numBits == 32 || ((val & (~0u << numBits)) == 0), "numBits & val out of range\n");
+
+    uint32_t totalPartialBits = m_partialByteBits + numBits;
+    uint32_t nextPartialBits = totalPartialBits & 7;
+    uint8_t  nextHeldByte = val << (8 - nextPartialBits);
+    uint32_t writeBytes = totalPartialBits >> 3;
+
+    if (writeBytes)
+    {
+        /* topword aligns m_partialByte with the msb of val */
+        uint32_t topword = (numBits - nextPartialBits) & ~7;
+#if USING_FTRAPV
+        uint32_t write_bits = (topword < 32 ? m_partialByte << topword : 0) | (val >> nextPartialBits);
+#else
+        uint32_t write_bits = (m_partialByte << topword) | (val >> nextPartialBits);
+#endif
+
+        switch (writeBytes)
+        {
+        case 4: push_back(write_bits >> 24);
+        case 3: push_back(write_bits >> 16);
+        case 2: push_back(write_bits >> 8);
+        case 1: push_back(write_bits);
+        }
+
+        m_partialByte = nextHeldByte;
+        m_partialByteBits = nextPartialBits;
+    }
+    else
+    {
+        m_partialByte |= nextHeldByte;
+        m_partialByteBits = nextPartialBits;
+    }
+}
+
+void Bitstream::writeByte(uint32_t val)
+{
+    // Only CABAC will call writeByte, the fifo must be byte aligned
+    X265_CHECK(!m_partialByteBits, "expecting m_partialByteBits = 0\n");
+
+    push_back(val);
+}
+
+void Bitstream::writeAlignOne()
+{
+    uint32_t numBits = (8 - m_partialByteBits) & 0x7;
+
+    write((1 << numBits) - 1, numBits);
+}
+
+void Bitstream::writeAlignZero()
+{
+    if (m_partialByteBits)
+    {
+        push_back(m_partialByte);
+        m_partialByte = 0;
+        m_partialByteBits = 0;
+    }
+}
+
+void Bitstream::writeByteAlignment()
+{
+    write(1, 1);
+    writeAlignZero();
+}
+
+void SyntaxElementWriter::writeUvlc(uint32_t code)
+{
+    uint32_t length = 1;
+    uint32_t temp = ++code;
+
+    X265_CHECK(temp, "writing -1 code, will cause infinite loop\n");
+
+    while (1 != temp)
+    {
+        temp >>= 1;
+        length += 2;
+    }
+
+    // Take care of cases where length > 32
+    m_bitIf->write(0, length >> 1);
+    m_bitIf->write(code, (length + 1) >> 1);
+}
--- a/x265/source/common/bitstream.h
+++ b/x265/source/common/bitstream.h
@ -0,0 +1,158 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Author: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_BITSTREAM_H
+#define X265_BITSTREAM_H 1
+
+namespace X265_NS {
+// private namespace
+
+class BitInterface
+{
+public:
+
+    virtual void     write(uint32_t val, uint32_t numBits)  = 0;
+    virtual void     writeByte(uint32_t val)                = 0;
+    virtual void     resetBits()                            = 0;
+    virtual uint32_t getNumberOfWrittenBits() const         = 0;
+    virtual void     writeAlignOne()                        = 0;
+    virtual void     writeAlignZero()                       = 0;
+    virtual ~BitInterface() {}
+};
+
+class BitCounter : public BitInterface
+{
+protected:
+
+    uint32_t  m_bitCounter;
+
+public:
+
+    BitCounter() : m_bitCounter(0) {}
+
+    void     write(uint32_t, uint32_t num)  { m_bitCounter += num; }
+    void     writeByte(uint32_t)            { m_bitCounter += 8;   }
+    void     resetBits()                    { m_bitCounter = 0;    }
+    uint32_t getNumberOfWrittenBits() const { return m_bitCounter; }
+    void     writeAlignOne()                { }
+    void     writeAlignZero()               { }
+};
+
+
+class Bitstream : public BitInterface
+{
+public:
+
+    Bitstream();
+    ~Bitstream()                             { X265_FREE(m_fifo); }
+
+    void     resetBits()                     { m_partialByteBits = m_byteOccupancy = 0; m_partialByte = 0; }
+    uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }
+    uint32_t getNumberOfWrittenBits()  const { return m_byteOccupancy * 8 + m_partialByteBits; }
+    const uint8_t* getFIFO() const           { return m_fifo; }
+
+    void     write(uint32_t val, uint32_t numBits);
+    void     writeByte(uint32_t val);
+
+    void     writeAlignOne();      // insert one bits until the bitstream is byte-aligned
+    void     writeAlignZero();     // insert zero bits until the bitstream is byte-aligned
+    void     writeByteAlignment(); // insert 1 bit, then pad to byte-align with zero
+
+private:
+
+    uint8_t *m_fifo;
+    uint32_t m_byteAlloc;
+    uint32_t m_byteOccupancy;
+    uint32_t m_partialByteBits;
+    uint8_t  m_partialByte;
+
+    void     push_back(uint8_t val);
+};
+
+static const uint8_t bitSize[256] =
+{
+    1, 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7,
+    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+};
+
+static inline int bs_size_ue(unsigned int val)
+{
+    return bitSize[val + 1];
+}
+
+static inline int bs_size_ue_big(unsigned int val)
+{
+    if (val < 255)
+        return bitSize[val + 1];
+    else
+        return bitSize[(val + 1) >> 8] + 16;
+}
+
+static inline int bs_size_se(int val)
+{
+    int tmp = 1 - val * 2;
+
+    if (tmp < 0) tmp = val * 2;
+    if (tmp < 256)
+        return bitSize[tmp];
+    else
+        return bitSize[tmp >> 8] + 16;
+}
+
+class SyntaxElementWriter
+{
+public:
+
+    BitInterface* m_bitIf;
+
+    SyntaxElementWriter() : m_bitIf(NULL) {}
+
+    /* silently discard the name of the syntax element */
+    inline void WRITE_CODE(uint32_t code, uint32_t length, const char *) { writeCode(code, length); }
+    inline void WRITE_UVLC(uint32_t code,                  const char *) { writeUvlc(code); }
+    inline void WRITE_SVLC(int32_t  code,                  const char *) { writeSvlc(code); }
+    inline void WRITE_FLAG(bool flag,                      const char *) { writeFlag(flag); }
+
+    void writeCode(uint32_t code, uint32_t length) { m_bitIf->write(code, length); }
+    void writeUvlc(uint32_t code);
+    void writeSvlc(int32_t code)                   { uint32_t ucode = (code <= 0) ? -code << 1 : (code << 1) - 1; writeUvlc(ucode); }
+    void writeFlag(bool code)                      { m_bitIf->write(code, 1); }
+};
+
+}
+
+#endif // ifndef X265_BITSTREAM_H
--- a/x265/source/common/common.cpp
+++ b/x265/source/common/common.cpp
@ -0,0 +1,219 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "slice.h"
+#include "threading.h"
+#include "x265.h"
+
+#if _WIN32
+#include <sys/types.h>
+#include <sys/timeb.h>
+#else
+#include <sys/time.h>
+#endif
+
+namespace X265_NS {
+
+#if CHECKED_BUILD || _DEBUG
+int g_checkFailures;
+#endif
+
+int64_t x265_mdate(void)
+{
+#if _WIN32
+    struct timeb tb;
+    ftime(&tb);
+    return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000;
+#else
+    struct timeval tv_date;
+    gettimeofday(&tv_date, NULL);
+    return (int64_t)tv_date.tv_sec * 1000000 + (int64_t)tv_date.tv_usec;
+#endif
+}
+
+#define X265_ALIGNBYTES 32
+
+#if _WIN32
+#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR)
+#define _aligned_malloc __mingw_aligned_malloc
+#define _aligned_free   __mingw_aligned_free
+#include "malloc.h"
+#endif
+
+void *x265_malloc(size_t size)
+{
+    return _aligned_malloc(size, X265_ALIGNBYTES);
+}
+
+void x265_free(void *ptr)
+{
+    if (ptr) _aligned_free(ptr);
+}
+
+#else // if _WIN32
+void *x265_malloc(size_t size)
+{
+    void *ptr;
+
+    if (posix_memalign((void**)&ptr, X265_ALIGNBYTES, size) == 0)
+        return ptr;
+    else
+        return NULL;
+}
+
+void x265_free(void *ptr)
+{
+    if (ptr) free(ptr);
+}
+
+#endif // if _WIN32
+
+/* Not a general-purpose function; multiplies input by -1/6 to convert
+ * qp to qscale. */
+int x265_exp2fix8(double x)
+{
+    int i = (int)(x * (-64.f / 6.f) + 512.5f);
+
+    if (i < 0) return 0;
+    if (i > 1023) return 0xffff;
+    return (x265_exp2_lut[i & 63] + 256) << (i >> 6) >> 8;
+}
+
+void general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...)
+{
+    if (param && level > param->logLevel)
+        return;
+    const int bufferSize = 4096;
+    char buffer[bufferSize];
+    int p = 0;
+    const char* log_level;
+    switch (level)
+    {
+    case X265_LOG_ERROR:
+        log_level = "error";
+        break;
+    case X265_LOG_WARNING:
+        log_level = "warning";
+        break;
+    case X265_LOG_INFO:
+        log_level = "info";
+        break;
+    case X265_LOG_DEBUG:
+        log_level = "debug";
+        break;
+    case X265_LOG_FULL:
+        log_level = "full";
+        break;
+    default:
+        log_level = "unknown";
+        break;
+    }
+
+    if (caller)
+        p += sprintf(buffer, "%-4s [%s]: ", caller, log_level);
+    va_list arg;
+    va_start(arg, fmt);
+    vsnprintf(buffer + p, bufferSize - p, fmt, arg);
+    va_end(arg);
+    fputs(buffer, stderr);
+}
+
+double x265_ssim2dB(double ssim)
+{
+    double inv_ssim = 1 - ssim;
+
+    if (inv_ssim <= 0.0000000001) /* Max 100dB */
+        return 100;
+
+    return -10.0 * log10(inv_ssim);
+}
+
+/* The qscale - qp conversion is specified in the standards.
+ * Approx qscale increases by 12%  with every qp increment */
+double x265_qScale2qp(double qScale)
+{
+    return 12.0 + 6.0 * (double)X265_LOG2(qScale / 0.85);
+}
+
+double x265_qp2qScale(double qp)
+{
+    return 0.85 * pow(2.0, (qp - 12.0) / 6.0);
+}
+
+uint32_t x265_picturePlaneSize(int csp, int width, int height, int plane)
+{
+    uint32_t size = (uint32_t)(width >> x265_cli_csps[csp].width[plane]) * (height >> x265_cli_csps[csp].height[plane]);
+
+    return size;
+}
+
+char* x265_slurp_file(const char *filename)
+{
+    if (!filename)
+        return NULL;
+
+    int bError = 0;
+    size_t fSize;
+    char *buf = NULL;
+
+    FILE *fh = fopen(filename, "rb");
+    if (!fh)
+    {
+        x265_log(NULL, X265_LOG_ERROR, "unable to open file %s\n", filename);
+        return NULL;
+    }
+
+    bError |= fseek(fh, 0, SEEK_END) < 0;
+    bError |= (fSize = ftell(fh)) <= 0;
+    bError |= fseek(fh, 0, SEEK_SET) < 0;
+    if (bError)
+        goto error;
+
+    buf = X265_MALLOC(char, fSize + 2);
+    if (!buf)
+    {
+        x265_log(NULL, X265_LOG_ERROR, "unable to allocate memory\n");
+        goto error;
+    }
+
+    bError |= fread(buf, 1, fSize, fh) != fSize;
+    if (buf[fSize - 1] != '\n')
+        buf[fSize++] = '\n';
+    buf[fSize] = 0;
+    fclose(fh);
+
+    if (bError)
+    {
+        x265_log(NULL, X265_LOG_ERROR, "unable to read the file\n");
+        X265_FREE(buf);
+        buf = NULL;
+    }
+    return buf;
+
+error:
+    fclose(fh);
+    return NULL;
+}
+
+}
--- a/x265/source/common/common.h
+++ b/x265/source/common/common.h
@ -0,0 +1,441 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_COMMON_H
+#define X265_COMMON_H
+
+#include <algorithm>
+#include <climits>
+#include <cmath>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+#include <ctime>
+
+#include <stdint.h>
+#include <memory.h>
+#include <assert.h>
+
+#include "x265.h"
+
+#if ENABLE_PPA && ENABLE_VTUNE
+#error "PPA and VTUNE cannot both be enabled. Disable one of them."
+#endif
+#if ENABLE_PPA
+#include "profile/PPA/ppa.h"
+#define ProfileScopeEvent(x) PPAScopeEvent(x)
+#define THREAD_NAME(n,i)
+#define PROFILE_INIT()       PPA_INIT()
+#define PROFILE_PAUSE()
+#define PROFILE_RESUME()
+#elif ENABLE_VTUNE
+#include "profile/vtune/vtune.h"
+#define ProfileScopeEvent(x) VTuneScopeEvent _vtuneTask(x)
+#define THREAD_NAME(n,i)     vtuneSetThreadName(n, i)
+#define PROFILE_INIT()       vtuneInit()
+#define PROFILE_PAUSE()      __itt_pause()
+#define PROFILE_RESUME()     __itt_resume()
+#else
+#define ProfileScopeEvent(x)
+#define THREAD_NAME(n,i)
+#define PROFILE_INIT()
+#define PROFILE_PAUSE()
+#define PROFILE_RESUME()
+#endif
+
+#define FENC_STRIDE 64
+#define NUM_INTRA_MODE 35
+
+#if defined(__GNUC__)
+#define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
+#define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
+#define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
+
+#if defined(__MINGW32__)
+#define fseeko fseeko64
+#endif
+
+#elif defined(_MSC_VER)
+
+#define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
+#define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
+#define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
+#define fseeko _fseeki64
+
+#endif // if defined(__GNUC__)
+
+#if HAVE_INT_TYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#define X265_LL "%" PRIu64
+#else
+#define X265_LL "%lld"
+#endif
+
+#if _DEBUG && defined(_MSC_VER)
+#define DEBUG_BREAK() __debugbreak()
+#elif __APPLE_CC__
+#define DEBUG_BREAK() __builtin_trap()
+#else
+#define DEBUG_BREAK() abort()
+#endif
+
+/* If compiled with CHECKED_BUILD perform run-time checks and log any that
+ * fail, both to stderr and to a file */
+#if CHECKED_BUILD || _DEBUG
+namespace X265_NS { extern int g_checkFailures; }
+#define X265_CHECK(expr, ...) if (!(expr)) { \
+    x265_log(NULL, X265_LOG_ERROR, __VA_ARGS__); \
+    FILE *fp = fopen("x265_check_failures.txt", "a"); \
+    if (fp) { fprintf(fp, "%s:%d\n", __FILE__, __LINE__); fprintf(fp, __VA_ARGS__); fclose(fp); } \
+    g_checkFailures++; DEBUG_BREAK(); \
+}
+#if _MSC_VER
+#pragma warning(disable: 4127) // some checks have constant conditions
+#endif
+#else
+#define X265_CHECK(expr, ...)
+#endif
+
+#if HIGH_BIT_DEPTH
+typedef uint16_t pixel;
+typedef uint32_t sum_t;
+typedef uint64_t sum2_t;
+typedef uint64_t pixel4;
+typedef int64_t  ssum2_t;
+#else
+typedef uint8_t  pixel;
+typedef uint16_t sum_t;
+typedef uint32_t sum2_t;
+typedef uint32_t pixel4;
+typedef int32_t  ssum2_t; // Signed sum
+#endif // if HIGH_BIT_DEPTH
+
+#if X265_DEPTH <= 10
+typedef uint32_t sse_ret_t;
+#else
+typedef uint64_t sse_ret_t;
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define MAX_UINT        0xFFFFFFFFU // max. value of unsigned 32-bit integer
+#define MAX_INT         2147483647  // max. value of signed 32-bit integer
+#define MAX_INT64       0x7FFFFFFFFFFFFFFFLL  // max. value of signed 64-bit integer
+#define MAX_DOUBLE      1.7e+308    // max. value of double-type value
+
+#define QP_MIN          0
+#define QP_MAX_SPEC     51 /* max allowed signaled QP in HEVC */
+#define QP_MAX_MAX      69 /* max allowed QP to be output by rate control */
+
+#define MIN_QPSCALE     0.21249999999999999
+#define MAX_MAX_QPSCALE 615.46574234477100
+
+#define BITS_FOR_POC 8
+
+template<typename T>
+inline T x265_min(T a, T b) { return a < b ? a : b; }
+
+template<typename T>
+inline T x265_max(T a, T b) { return a > b ? a : b; }
+
+template<typename T>
+inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); }
+
+template<typename T> /* clip to pixel range, 0..255 or 0..1023 */
+inline pixel x265_clip(T x) { return (pixel)x265_min<T>(T((1 << X265_DEPTH) - 1), x265_max<T>(T(0), x)); }
+
+typedef int16_t  coeff_t;      // transform coefficient
+
+#define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
+#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
+#define COPY2_IF_LT(x, y, a, b) \
+    if ((y) < (x)) \
+    { \
+        (x) = (y); \
+        (a) = (b); \
+    }
+#define COPY3_IF_LT(x, y, a, b, c, d) \
+    if ((y) < (x)) \
+    { \
+        (x) = (y); \
+        (a) = (b); \
+        (c) = (d); \
+    }
+#define COPY4_IF_LT(x, y, a, b, c, d, e, f) \
+    if ((y) < (x)) \
+    { \
+        (x) = (y); \
+        (a) = (b); \
+        (c) = (d); \
+        (e) = (f); \
+    }
+#define X265_MIN3(a, b, c) X265_MIN((a), X265_MIN((b), (c)))
+#define X265_MAX3(a, b, c) X265_MAX((a), X265_MAX((b), (c)))
+#define X265_MIN4(a, b, c, d) X265_MIN((a), X265_MIN3((b), (c), (d)))
+#define X265_MAX4(a, b, c, d) X265_MAX((a), X265_MAX3((b), (c), (d)))
+#define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
+#define MAX_CHROMA_LAMBDA_OFFSET 36
+
+// arbitrary, but low because SATD scores are 1/4 normal
+#define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
+#define X265_LOOKAHEAD_MAX 250
+
+// Use the same size blocks as x264.  Using larger blocks seems to give artificially
+// high cost estimates (intra and inter both suffer)
+#define X265_LOWRES_CU_SIZE   8
+#define X265_LOWRES_CU_BITS   3
+
+#define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
+#define X265_FREE(ptr)              x265_free(ptr)
+#define CHECKED_MALLOC(var, type, count) \
+    { \
+        var = (type*)x265_malloc(sizeof(type) * (count)); \
+        if (!var) \
+        { \
+            x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
+            goto fail; \
+        } \
+    }
+#define CHECKED_MALLOC_ZERO(var, type, count) \
+    { \
+        var = (type*)x265_malloc(sizeof(type) * (count)); \
+        if (var) \
+            memset((void*)var, 0, sizeof(type) * (count)); \
+        else \
+        { \
+            x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
+            goto fail; \
+        } \
+    }
+
+#if defined(_MSC_VER)
+#define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
+#define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
+#else
+#define X265_LOG2F(x) log2f(x)
+#define X265_LOG2(x)  log2(x)
+#endif
+
+#define NUM_CU_DEPTH            4                           // maximum number of CU depths
+#define NUM_FULL_DEPTH          5                           // maximum number of full depths
+#define MIN_LOG2_CU_SIZE        3                           // log2(minCUSize)
+#define MAX_LOG2_CU_SIZE        6                           // log2(maxCUSize)
+#define MIN_CU_SIZE             (1 << MIN_LOG2_CU_SIZE)     // minimum allowable size of CU
+#define MAX_CU_SIZE             (1 << MAX_LOG2_CU_SIZE)     // maximum allowable size of CU
+
+#define LOG2_UNIT_SIZE          2                           // log2(unitSize)
+#define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
+
+#define MAX_NUM_PARTITIONS      256
+#define NUM_4x4_PARTITIONS      (1U << (g_unitSizeDepth << 1)) // number of 4x4 units in max CU size
+
+#define MIN_PU_SIZE             4
+#define MIN_TU_SIZE             4
+#define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
+
+#define MAX_LOG2_TR_SIZE 5
+#define MAX_LOG2_TS_SIZE 2 // TODO: RExt
+#define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
+#define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
+
+#define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC
+                                      // transitions from Golomb-Rice to TU+EG(k)
+
+#define SBH_THRESHOLD               4 // fixed sign bit hiding controlling threshold
+
+#define C1FLAG_NUMBER               8 // maximum number of largerThan1 flag coded in one chunk:  16 in HM5
+#define C2FLAG_NUMBER               1 // maximum number of largerThan2 flag coded in one chunk:  16 in HM5
+
+#define SAO_ENCODING_RATE           0.75
+#define SAO_ENCODING_RATE_CHROMA    0.5
+
+#define MLS_GRP_NUM                 64 // Max number of coefficient groups, max(16, 64)
+#define MLS_CG_SIZE                 4  // Coefficient group size of 4x4
+#define MLS_CG_BLK_SIZE             (MLS_CG_SIZE * MLS_CG_SIZE)
+#define MLS_CG_LOG2_SIZE            2
+
+#define QUANT_IQUANT_SHIFT          20 // Q(QP%6) * IQ(QP%6) = 2^20
+#define QUANT_SHIFT                 14 // Q(4) = 2^14
+#define SCALE_BITS                  15 // Inherited from TMuC, presumably for fractional bit estimates in RDOQ
+#define MAX_TR_DYNAMIC_RANGE        15 // Maximum transform dynamic range (excluding sign bit)
+
+#define SHIFT_INV_1ST               7  // Shift after first inverse transform stage
+#define SHIFT_INV_2ND               12 // Shift after second inverse transform stage
+
+#define AMVP_DECIMATION_FACTOR      4
+
+#define SCAN_SET_SIZE               16
+#define LOG2_SCAN_SET_SIZE          4
+
+#define ALL_IDX                     -1
+#define PLANAR_IDX                  0
+#define VER_IDX                     26 // index for intra VERTICAL   mode
+#define HOR_IDX                     10 // index for intra HORIZONTAL mode
+#define DC_IDX                      1  // index for intra DC mode
+#define NUM_CHROMA_MODE             5  // total number of chroma modes
+#define DM_CHROMA_IDX               36 // chroma mode index for derived from luma intra mode
+
+#define MDCS_ANGLE_LIMIT            4 // distance from true angle that horiz or vertical scan is allowed
+#define MDCS_LOG2_MAX_SIZE          3 // TUs with log2 of size greater than this can only use diagonal scan
+
+#define MAX_NUM_REF_PICS            16 // max. number of pictures used for reference
+#define MAX_NUM_REF                 16 // max. number of entries in picture reference list
+
+#define REF_NOT_VALID               -1
+
+#define AMVP_NUM_CANDS              2 // number of AMVP candidates
+#define MRG_MAX_NUM_CANDS           5 // max number of final merge candidates
+
+#define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
+#define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
+#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
+
+#define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform
+#define MAX_NUM_TR_CATEGORIES       16                        // 32, 16, 8, 4 transform categories each for luma and chroma
+
+namespace X265_NS {
+
+enum { SAO_NUM_OFFSET = 4 };
+
+enum SaoMergeMode
+{
+    SAO_MERGE_NONE,
+    SAO_MERGE_LEFT,
+    SAO_MERGE_UP
+};
+
+struct SaoCtuParam
+{
+    SaoMergeMode mergeMode;
+    int  typeIdx;
+    uint32_t bandPos;    // BO band position
+    int  offset[SAO_NUM_OFFSET];
+
+    void reset()
+    {
+        mergeMode = SAO_MERGE_NONE;
+        typeIdx = -1;
+        bandPos = 0;
+        offset[0] = 0;
+        offset[1] = 0;
+        offset[2] = 0;
+        offset[3] = 0;
+    }
+};
+
+struct SAOParam
+{
+    SaoCtuParam* ctuParam[3];
+    bool         bSaoFlag[2];
+    int          numCuInWidth;
+
+    SAOParam()
+    {
+        for (int i = 0; i < 3; i++)
+            ctuParam[i] = NULL;
+    }
+
+    ~SAOParam()
+    {
+        delete[] ctuParam[0];
+        delete[] ctuParam[1];
+        delete[] ctuParam[2];
+    }
+};
+
+/* Stores inter analysis data for a single frame */
+struct analysis_inter_data
+{
+    int32_t*    ref;
+    uint8_t*    depth;
+    uint8_t*    modes;
+    uint32_t*   bestMergeCand;
+};
+
+/* Stores intra analysis data for a single frame. This struct needs better packing */
+struct analysis_intra_data
+{
+    uint8_t*  depth;
+    uint8_t*  modes;
+    char*     partSizes;
+    uint8_t*  chromaModes;
+};
+
+enum TextType
+{
+    TEXT_LUMA     = 0,  // luma
+    TEXT_CHROMA_U = 1,  // chroma U
+    TEXT_CHROMA_V = 2,  // chroma V
+    MAX_NUM_COMPONENT = 3
+};
+
+// coefficient scanning type used in ACS
+enum ScanType
+{
+    SCAN_DIAG = 0,     // up-right diagonal scan
+    SCAN_HOR = 1,      // horizontal first scan
+    SCAN_VER = 2,      // vertical first scan
+    NUM_SCAN_TYPE = 3
+};
+
+enum SignificanceMapContextType
+{
+    CONTEXT_TYPE_4x4 = 0,
+    CONTEXT_TYPE_8x8 = 1,
+    CONTEXT_TYPE_NxN = 2,
+    CONTEXT_NUMBER_OF_TYPES = 3
+};
+
+/* located in pixel.cpp */
+void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
+
+/* located in common.cpp */
+int64_t  x265_mdate(void);
+#define  x265_log(param, ...) general_log(param, "x265", __VA_ARGS__)
+void     general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...);
+int      x265_exp2fix8(double x);
+
+double   x265_ssim2dB(double ssim);
+double   x265_qScale2qp(double qScale);
+double   x265_qp2qScale(double qp);
+uint32_t x265_picturePlaneSize(int csp, int width, int height, int plane);
+
+void*    x265_malloc(size_t size);
+void     x265_free(void *ptr);
+char*    x265_slurp_file(const char *filename);
+
+/* located in primitives.cpp */
+void     x265_setup_primitives(x265_param* param);
+void     x265_report_simd(x265_param* param);
+}
+
+#include "constants.h"
+
+#endif // ifndef X265_COMMON_H
--- a/x265/source/common/constants.cpp
+++ b/x265/source/common/constants.cpp
@ -0,0 +1,582 @@
+/*****************************************************************************
+* Copyright (C) 2015 x265 project
+*
+* Authors: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#include "common.h"
+#include "constants.h"
+#include "threading.h"
+
+namespace X265_NS {
+
+#if X265_DEPTH == 12
+
+// lambda = pow(2, (double)q / 6 - 2) * (1 << (12 - 8));
+double x265_lambda_tab[QP_MAX_MAX + 1] =
+{
+    4.0000,    4.4898,    5.0397,    5.6569,     6.3496,
+    7.1272,    8.0000,    8.9797,    10.0794,    11.3137,
+    12.6992,   14.2544,   16.0000,   17.9594,    20.1587,
+    22.6274,   25.3984,   28.5088,   32.0000,    35.9188,
+    40.3175,   45.2548,   50.7968,   57.0175,    64.0000,
+    71.8376,   80.6349,   90.5097,   101.5937,   114.0350,
+    128.0000,  143.6751,  161.2699,  181.0193,   203.1873,
+    228.0701,  256.0000,  287.3503,  322.5398,   362.0387,
+    406.3747,  456.1401,  512.0000,  574.7006,   645.0796,
+    724.0773,  812.7493,  912.2803,  1024.0000,  1149.4011,
+    1290.1592, 1448.1547, 1625.4987, 1824.5606,  2048.0000,
+    2298.8023, 2580.3183, 2896.3094, 3250.9974,  3649.1211,
+    4096.0000, 4597.6045, 5160.6366, 5792.6188,  6501.9947,
+    7298.2423, 8192.0000, 9195.2091, 10321.2732, 11585.2375
+};
+
+// lambda2 = pow(lambda, 2) * scale (0.85);
+double x265_lambda2_tab[QP_MAX_MAX + 1] =
+{
+    13.6000,       17.1349,       21.5887,       27.2000,       34.2699,
+    43.1773,       54.4000,       68.5397,       86.3546,       108.8000,
+    137.0794,      172.7092,      217.6000,      274.1588,      345.4185,
+    435.2000,      548.3176,      690.8369,      870.4000,      1096.6353,
+    1381.6739,     1740.8000,     2193.2706,     2763.3478,     3481.6000,
+    4386.5411,     5526.6955,     6963.2000,     8773.0822,     11053.3910,
+    13926.4000,    17546.1645,    22106.7819,    27852.8000,    35092.3291,
+    44213.5641,    55705.6000,    70184.6579,    88427.1282,    111411.2000,
+    140369.3159,   176854.2563,   222822.4000,   280738.6324,   353708.5127,
+    445644.8001,   561477.2648,   707417.0237,   891289.6000,   1122954.5277,
+    1414834.0484,  1782579.2003,  2245909.0566,  2829668.0981,  3565158.4000,
+    4491818.1146,  5659336.1938,  7130316.8013,  8983636.2264,  11318672.3923,
+    14260633.6000, 17967272.4585, 22637344.7751, 28521267.1953, 35934544.9165,
+    45274689.5567, 57042534.4000, 71869089.8338, 90549379.1181, 114085068.8008
+};
+
+#elif X265_DEPTH == 10
+
+// lambda = pow(2, (double)q / 6 - 2) * (1 << (X265_DEPTH - 8));
+double x265_lambda_tab[QP_MAX_MAX + 1] =
+{
+    1.0000, 1.1225, 1.2599, 1.4142, 1.5874, 
+    1.7818, 2.0000, 2.2449, 2.5198, 2.8284, 
+    3.1748, 3.5636, 4.0000, 4.4898, 5.0397, 
+    5.6569, 6.3496, 7.1272, 8.0000, 8.9797, 
+    10.0794, 11.3137, 12.6992, 14.2544, 16.0000, 
+    17.9594, 20.1587, 22.6274, 25.3984, 28.5088, 
+    32.0000, 35.9188, 40.3175, 45.2548, 50.7968, 
+    57.0175, 64.0000, 71.8376, 80.6349, 90.5097, 
+    101.5937, 114.0350, 128.0000, 143.6751, 161.2699, 
+    181.0193, 203.1873, 228.0701, 256.0000, 287.3503, 
+    322.5398, 362.0387, 406.3747, 456.1401, 512.0000, 
+    574.7006, 645.0796, 724.0773, 812.7493, 912.2803, 
+    1024.0000, 1149.4011, 1290.1592, 1448.1547, 1625.4987, 
+    1824.5606, 2048.0000, 2298.8023, 2580.3183, 2896.3094,
+};
+
+// lambda2 = pow(lambda, 2) * scale (0.85);
+double x265_lambda2_tab[QP_MAX_MAX + 1] =
+{
+    0.8500, 1.0709, 1.3493, 1.7000, 2.1419, 
+    2.6986, 3.4000, 4.2837, 5.3972, 6.8000, 
+    8.5675, 10.7943, 13.6000, 17.1349, 21.5887, 
+    27.2000, 34.2699, 43.1773, 54.4000, 68.5397, 
+    86.3546, 108.8000, 137.0794, 172.7092, 217.6000, 
+    274.1588, 345.4185, 435.2000, 548.3176, 690.8369, 
+    870.4000, 1096.6353, 1381.6739, 1740.8000, 2193.2706, 
+    2763.3478, 3481.6000, 4386.5411, 5526.6955, 6963.2000, 
+    8773.0823, 11053.3910, 13926.4000, 17546.1645, 22106.7820, 
+    27852.8000, 35092.3290, 44213.5640, 55705.6000, 70184.6580, 
+    88427.1280, 111411.2000, 140369.3161, 176854.2561, 222822.4000, 
+    280738.6321, 353708.5122, 445644.8000, 561477.2643, 707417.0243, 
+    891289.6000, 1122954.5286, 1414834.0486, 1782579.2000, 2245909.0572, 
+    2829668.0973, 3565158.4000, 4491818.1144, 5659336.1946, 7130316.8000, 
+};
+
+#else /* !HIGH_BIT_DEPTH */
+
+// lambda = pow(2, (double)q / 6 - 2);
+double x265_lambda_tab[QP_MAX_MAX + 1] =
+{
+    0.2500, 0.2806, 0.3150, 0.3536, 0.3969,
+    0.4454, 0.5000, 0.5612, 0.6300, 0.7071,
+    0.7937, 0.8909, 1.0000, 1.1225, 1.2599,
+    1.4142, 1.5874, 1.7818, 2.0000, 2.2449,
+    2.5198, 2.8284, 3.1748, 3.5636, 4.0000,
+    4.4898, 5.0397, 5.6569, 6.3496, 7.1272,
+    8.0000, 8.9797, 10.0794, 11.3137, 12.6992,
+    14.2544, 16.0000, 17.9594, 20.1587, 22.6274,
+    25.3984, 28.5088, 32.0000, 35.9188, 40.3175,
+    45.2548, 50.7968, 57.0175, 64.0000, 71.8376,
+    80.6349, 90.5097, 101.5937, 114.0350, 128.0000,
+    143.6751, 161.2699, 181.0193, 203.1873, 228.0701,
+    256.0000, 287.3503, 322.5398, 362.0387, 406.3747,
+    456.1401, 512.0000, 574.7006, 645.0796, 724.0773
+};
+
+// lambda2 = pow(lambda, 2) * scale (0.85);
+double x265_lambda2_tab[QP_MAX_MAX + 1] =
+{
+    0.0531, 0.0669, 0.0843, 0.1063, 0.1339,
+    0.1687, 0.2125, 0.2677, 0.3373, 0.4250,
+    0.5355, 0.6746, 0.8500, 1.0709, 1.3493,
+    1.7000, 2.1419, 2.6986, 3.4000, 4.2837,
+    5.3970, 6.8000, 8.5675, 10.7943, 13.6000,
+    17.1345, 21.5887, 27.2004, 34.2699, 43.1773,
+    54.4000, 68.5397, 86.3551, 108.7998, 137.0792,
+    172.7097, 217.6000, 274.1590, 345.4172, 435.1993,
+    548.3169, 690.8389, 870.4000, 1096.6362, 1381.6757,
+    1740.7974, 2193.2676, 2763.3460, 3481.6000, 4386.5446,
+    5526.6890, 6963.2049, 8773.0879, 11053.3840, 13926.4000,
+    17546.1542, 22106.7835, 27852.7889, 35092.3170, 44213.5749,
+    55705.6000, 70184.6657, 88427.1342, 111411.2172, 140369.3373,
+    176854.2222, 222822.4000, 280738.6627, 353708.5368, 445644.7459
+};
+
+#endif
+
+const uint16_t x265_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
+{
+       16,    20,    25,    32,    40,    50,
+       64,    80,   101,   128,   161,   203,
+      256,   322,   406,   512,   645,   812,
+     1024,  1290,  1625,  2048,  2580,  3250,
+     4096,  5160,  6501,  8192, 10321, 13003,
+    16384, 20642, 26007, 32768, 41285, 52015,
+    65535
+};
+
+int      g_ctuSizeConfigured = 0;
+uint32_t g_maxLog2CUSize = MAX_LOG2_CU_SIZE;
+uint32_t g_maxCUSize     = MAX_CU_SIZE;
+uint32_t g_unitSizeDepth = NUM_CU_DEPTH;
+uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
+uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
+uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
+
+const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
+{
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
+    16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60,
+    32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44,
+    48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
+};
+
+const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] =
+{
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60,
+    48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60
+};
+
+void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx)
+{
+    uint32_t stride = 1 << maxFullDepth;
+
+    if (depth > maxFullDepth)
+    {
+        curIdx[0] = startVal;
+        curIdx++;
+    }
+    else
+    {
+        int step = stride >> depth;
+        initZscanToRaster(maxFullDepth, depth + 1, startVal,                        curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step,                 curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride,        curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride + step, curIdx);
+    }
+}
+
+void initRasterToZscan(uint32_t maxFullDepth)
+{
+    uint32_t numPartitions = 1 << (maxFullDepth * 2);
+
+    for (uint32_t i = 0; i < numPartitions; i++)
+        g_rasterToZscan[g_zscanToRaster[i]] = i;
+}
+
+const int16_t g_lumaFilter[4][NTAPS_LUMA] =
+{
+    {  0, 0,   0, 64,  0,   0, 0,  0 },
+    { -1, 4, -10, 58, 17,  -5, 1,  0 },
+    { -1, 4, -11, 40, 40, -11, 4, -1 },
+    {  0, 1,  -5, 17, 58, -10, 4, -1 }
+};
+
+const int16_t g_chromaFilter[8][NTAPS_CHROMA] =
+{
+    {  0, 64,  0,  0 },
+    { -2, 58, 10, -2 },
+    { -4, 54, 16, -2 },
+    { -6, 46, 28, -4 },
+    { -4, 36, 36, -4 },
+    { -4, 28, 46, -6 },
+    { -2, 16, 54, -4 },
+    { -2, 10, 58, -2 }
+};
+
+const int16_t g_t4[4][4] =
+{
+    { 64, 64, 64, 64 },
+    { 83, 36, -36, -83 },
+    { 64, -64, -64, 64 },
+    { 36, -83, 83, -36 }
+};
+
+const int16_t g_t8[8][8] =
+{
+    { 64, 64, 64, 64, 64, 64, 64, 64 },
+    { 89, 75, 50, 18, -18, -50, -75, -89 },
+    { 83, 36, -36, -83, -83, -36, 36, 83 },
+    { 75, -18, -89, -50, 50, 89, 18, -75 },
+    { 64, -64, -64, 64, 64, -64, -64, 64 },
+    { 50, -89, 18, 75, -75, -18, 89, -50 },
+    { 36, -83, 83, -36, -36, 83, -83, 36 },
+    { 18, -50, 75, -89, 89, -75, 50, -18 }
+};
+
+const int16_t g_t16[16][16] =
+{
+    { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+    { 90, 87, 80, 70, 57, 43, 25,  9, -9, -25, -43, -57, -70, -80, -87, -90 },
+    { 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89 },
+    { 87, 57,  9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87 },
+    { 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83 },
+    { 80,  9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80 },
+    { 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75 },
+    { 70, -43, -87,  9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70 },
+    { 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64 },
+    { 57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87,  9, -90, 25, 80, -57 },
+    { 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50 },
+    { 43, -90, 57, 25, -87, 70,  9, -80, 80, -9, -70, 87, -25, -57, 90, -43 },
+    { 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36 },
+    { 25, -70, 90, -80, 43,  9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25 },
+    { 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18 },
+    {  9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9 }
+};
+
+const int16_t g_t32[32][32] =
+{
+    { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+    { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13,  4, -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 },
+    { 90, 87, 80, 70, 57, 43, 25,  9, -9, -25, -43, -57, -70, -80, -87, -90, -90, -87, -80, -70, -57, -43, -25, -9,  9, 25, 43, 57, 70, 80, 87, 90 },
+    { 90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13, 13, 38, 61, 78, 88, 90, 85, 73, 54, 31,  4, -22, -46, -67, -82, -90 },
+    { 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89, 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89 },
+    { 88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22, -22, -61, -85, -90, -73, -38,  4, 46, 78, 90, 82, 54, 13, -31, -67, -88 },
+    { 87, 57,  9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87, -87, -57, -9, 43, 80, 90, 70, 25, -25, -70, -90, -80, -43,  9, 57, 87 },
+    { 85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31, 31, 78, 90, 61,  4, -54, -88, -82, -38, 22, 73, 90, 67, 13, -46, -85 },
+    { 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83 },
+    { 82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67,  4, 73, 88, 38, -38, -88, -73, -4, 67, 90, 46, -31, -85, -78, -13, 61, 90, 54, -22, -82 },
+    { 80,  9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80, -80, -9, 70, 87, 25, -57, -90, -43, 43, 90, 57, -25, -87, -70,  9, 80 },
+    { 78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46, 46, 90, 38, -54, -90, -31, 61, 88, 22, -67, -85, -13, 73, 82,  4, -78 },
+    { 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75, 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75 },
+    { 73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54, -54, -85,  4, 88, 46, -61, -82, 13, 90, 38, -67, -78, 22, 90, 31, -73 },
+    { 70, -43, -87,  9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70, -70, 43, 87, -9, -90, -25, 80, 57, -57, -80, 25, 90,  9, -87, -43, 70 },
+    { 67, -54, -78, 38, 85, -22, -90,  4, 90, 13, -88, -31, 82, 46, -73, -61, 61, 73, -46, -82, 31, 88, -13, -90, -4, 90, 22, -85, -38, 78, 54, -67 },
+    { 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64 },
+    { 61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67, -67, -54, 78, 38, -85, -22, 90,  4, -90, 13, 88, -31, -82, 46, 73, -61 },
+    { 57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87,  9, -90, 25, 80, -57, -57, 80, 25, -90,  9, 87, -43, -70, 70, 43, -87, -9, 90, -25, -80, 57 },
+    { 54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73, 73, 31, -90, 22, 78, -67, -38, 90, -13, -82, 61, 46, -88,  4, 85, -54 },
+    { 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50, 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50 },
+    { 46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82,  4, 78, -78, -4, 82, -73, -13, 85, -67, -22, 88, -61, -31, 90, -54, -38, 90, -46 },
+    { 43, -90, 57, 25, -87, 70,  9, -80, 80, -9, -70, 87, -25, -57, 90, -43, -43, 90, -57, -25, 87, -70, -9, 80, -80,  9, 70, -87, 25, 57, -90, 43 },
+    { 38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82, 82, -22, -54, 90, -61, -13, 78, -85, 31, 46, -90, 67,  4, -73, 88, -38 },
+    { 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36 },
+    { 31, -78, 90, -61,  4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85, -85, 46, 13, -67, 90, -73, 22, 38, -82, 88, -54, -4, 61, -90, 78, -31 },
+    { 25, -70, 90, -80, 43,  9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25, -25, 70, -90, 80, -43, -9, 57, -87, 87, -57,  9, 43, -80, 90, -70, 25 },
+    { 22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88, 88, -67, 31, 13, -54, 82, -90, 78, -46,  4, 38, -73, 90, -85, 61, -22 },
+    { 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18, 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18 },
+    { 13, -38, 61, -78, 88, -90, 85, -73, 54, -31,  4, 22, -46, 67, -82, 90, -90, 82, -67, 46, -22, -4, 31, -54, 73, -85, 90, -88, 78, -61, 38, -13 },
+    {  9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9, -9, 25, -43, 57, -70, 80, -87, 90, -90, 87, -80, 70, -57, 43, -25,  9 },
+    {  4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }
+};
+
+const uint8_t g_chromaScale[ChromaQPMappingTableSize] =
+{
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+    51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51
+};
+
+const uint8_t g_chroma422IntraAngleMappingTable[AngleMapping422TableSize] =
+{ 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31, DM_CHROMA_IDX };
+
+const uint8_t g_log2Size[MAX_CU_SIZE + 1] =
+{
+    0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    6
+};
+
+const uint16_t g_scan2x2[][2*2] =
+{
+    { 0, 2, 1, 3 },
+    { 0, 1, 2, 3 },
+};
+
+const uint16_t g_scan8x8[NUM_SCAN_TYPE][8 * 8] =
+{
+    { 0,   8,  1, 16,  9,  2, 24, 17, 10,  3, 25, 18, 11, 26, 19, 27, 32, 40, 33, 48, 41, 34, 56, 49, 42, 35, 57, 50, 43, 58, 51, 59,
+      4,  12,  5, 20, 13,  6, 28, 21, 14,  7, 29, 22, 15, 30, 23, 31, 36, 44, 37, 52, 45, 38, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63 },
+    { 0,   1,  2,  3,  8,  9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27,  4,  5,  6,  7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31,
+      32, 33, 34, 35, 40, 41, 42, 43, 48, 49, 50, 51, 56, 57, 58, 59, 36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63 },
+    { 0,   8, 16, 24,  1,  9, 17, 25,  2, 10, 18, 26,  3, 11, 19, 27, 32, 40, 48, 56, 33, 41, 49, 57, 34, 42, 50, 58, 35, 43, 51, 59,
+      4,  12, 20, 28,  5, 13, 21, 29,  6, 14, 22, 30,  7, 15, 23, 31, 36, 44, 52, 60, 37, 45, 53, 61, 38, 46, 54, 62, 39, 47, 55, 63 }
+};
+
+ALIGN_VAR_16(const uint16_t, g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4]) =
+{
+    { 0,  4,  1,  8,  5,  2, 12,  9,  6,  3, 13, 10,  7, 14, 11, 15 },
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 },
+    { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0 }
+};
+
+const uint16_t g_scan16x16[16 * 16] =
+{
+    0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 49, 34, 19, 50, 35, 51,
+    64, 80, 65, 96, 81, 66, 112, 97, 82, 67, 113, 98, 83, 114, 99, 115,
+    4, 20, 5, 36, 21, 6, 52, 37, 22, 7, 53, 38, 23, 54, 39, 55,
+    128, 144, 129, 160, 145, 130, 176, 161, 146, 131, 177, 162, 147, 178, 163, 179,
+    68, 84, 69, 100, 85, 70, 116, 101, 86, 71, 117, 102, 87, 118, 103, 119,
+    8, 24, 9, 40, 25, 10, 56, 41, 26, 11, 57, 42, 27, 58, 43, 59,
+    192,208, 193,224,209, 194,240,225,210, 195,241,226,211,242,227,243,
+    132, 148, 133, 164, 149, 134, 180, 165, 150, 135, 181, 166, 151, 182, 167, 183,
+    72, 88, 73, 104, 89, 74, 120, 105, 90, 75, 121, 106, 91, 122, 107, 123,
+    12, 28, 13, 44, 29, 14, 60, 45, 30, 15, 61, 46, 31, 62, 47, 63,
+    196,212, 197,228,213, 198,244,229,214, 199,245,230,215,246,231,247,
+    136, 152, 137, 168, 153, 138, 184, 169, 154, 139, 185, 170, 155, 186, 171, 187,
+    76, 92, 77, 108, 93, 78, 124, 109, 94, 79, 125, 110, 95, 126, 111, 127,
+    200,216,201,232,217,202,248,233,218,203,249,234,219,250,235,251,
+    140, 156, 141, 172, 157, 142, 188, 173, 158, 143, 189, 174, 159, 190, 175, 191,
+    204,220,205,236,221,206,252,237,222,207,253,238,223,254,239,255
+};
+
+const uint16_t g_scan8x8diag[8 * 8] =
+{
+    0,   8,  1, 16,  9,  2, 24, 17,
+    10,  3, 32, 25, 18, 11,  4, 40,
+    33, 26, 19, 12,  5, 48, 41, 34,
+    27, 20, 13,  6, 56, 49, 42, 35,
+    28, 21, 14,  7, 57, 50, 43, 36,
+    29, 22, 15, 58, 51, 44, 37, 30,
+    23, 59, 52, 45, 38, 31, 60, 53,
+    46, 39, 61, 54, 47, 62, 55, 63
+};
+
+const uint16_t g_scan32x32[32 * 32] =
+{
+    0,32,1,64,33,2,96,65,34,3,97,66,35,98,67,99,128,160,129,192,161,130,224,193,162,131,225,194,163,226,195,227,
+    4,36,5,68,37,6,100,69,38,7,101,70,39,102,71,103,256,288,257,320,289,258,352,321,290,259,353,322,291,354,323,355,
+    132,164,133,196,165,134,228,197,166,135,229,198,167,230,199,231,8,40,9,72,41,10,104,73,42,11,105,74,43,106,75,107,
+    384,416,385,448,417,386,480,449,418,387,481,450,419,482,451,483,260,292,261,324,293,262,356,325,294,263,357,326,295,358,327,359,
+    136,168,137,200,169,138,232,201,170,139,233,202,171,234,203,235,12,44,13,76,45,14,108,77,46,15,109,78,47,110,79,111,
+    512,544,513,576,545,514,608,577,546,515,609,578,547,610,579,611,388,420,389,452,421,390,484,453,422,391,485,454,423,486,455,487,
+    264,296,265,328,297,266,360,329,298,267,361,330,299,362,331,363,140,172,141,204,173,142,236,205,174,143,237,206,175,238,207,239,
+    16,48,17,80,49,18,112,81,50,19,113,82,51,114,83,115,640,672,641,704,673,642,736,705,674,643,737,706,675,738,707,739,
+    516,548,517,580,549,518,612,581,550,519,613,582,551,614,583,615,392,424,393,456,425,394,488,457,426,395,489,458,427,490,459,491,
+    268,300,269,332,301,270,364,333,302,271,365,334,303,366,335,367,144,176,145,208,177,146,240,209,178,147,241,210,179,242,211,243,
+    20,52,21,84,53,22,116,85,54,23,117,86,55,118,87,119,768,800,769,832,801,770,864,833,802,771,865,834,803,866,835,867,
+    644,676,645,708,677,646,740,709,678,647,741,710,679,742,711,743,520,552,521,584,553,522,616,585,554,523,617,586,555,618,587,619,
+    396,428,397,460,429,398,492,461,430,399,493,462,431,494,463,495,272,304,273,336,305,274,368,337,306,275,369,338,307,370,339,371,
+    148,180,149,212,181,150,244,213,182,151,245,214,183,246,215,247,24,56,25,88,57,26,120,89,58,27,121,90,59,122,91,123,
+    896,928,897,960,929,898,992,961,930,899,993,962,931,994,963,995,772,804,773,836,805,774,868,837,806,775,869,838,807,870,839,871,
+    648,680,649,712,681,650,744,713,682,651,745,714,683,746,715,747,524,556,525,588,557,526,620,589,558,527,621,590,559,622,591,623,
+    400,432,401,464,433,402,496,465,434,403,497,466,435,498,467,499,276,308,277,340,309,278,372,341,310,279,373,342,311,374,343,375,
+    152,184,153,216,185,154,248,217,186,155,249,218,187,250,219,251,28,60,29,92,61,30,124,93,62,31,125,94,63,126,95,127,
+    900,932,901,964,933,902,996,965,934,903,997,966,935,998,967,999,776,808,777,840,809,778,872,841,810,779,873,842,811,874,843,875,
+    652,684,653,716,685,654,748,717,686,655,749,718,687,750,719,751,528,560,529,592,561,530,624,593,562,531,625,594,563,626,595,627,
+    404,436,405,468,437,406,500,469,438,407,501,470,439,502,471,503,280,312,281,344,313,282,376,345,314,283,377,346,315,378,347,379,
+    156,188,157,220,189,158,252,221,190,159,253,222,191,254,223,255,904,936,905,968,937,906,1000,969,938,907,1001,970,939,1002,971,1003,
+    780,812,781,844,813,782,876,845,814,783,877,846,815,878,847,879,656,688,657,720,689,658,752,721,690,659,753,722,691,754,723,755,
+    532,564,533,596,565,534,628,597,566,535,629,598,567,630,599,631,408,440,409,472,441,410,504,473,442,411,505,474,443,506,475,507,
+    284,316,285,348,317,286,380,349,318,287,381,350,319,382,351,383,908,940,909,972,941,910,1004,973,942,911,1005,974,943,1006,975,1007,
+    784,816,785,848,817,786,880,849,818,787,881,850,819,882,851,883,660,692,661,724,693,662,756,725,694,663,757,726,695,758,727,759,
+    536,568,537,600,569,538,632,601,570,539,633,602,571,634,603,635,412,444,413,476,445,414,508,477,446,415,509,478,447,510,479,511,
+    912,944,913,976,945,914,1008,977,946,915,1009,978,947,1010,979,1011,788,820,789,852,821,790,884,853,822,791,885,854,823,886,855,887,
+    664,696,665,728,697,666,760,729,698,667,761,730,699,762,731,763,540,572,541,604,573,542,636,605,574,543,637,606,575,638,607,639,
+    916,948,917,980,949,918,1012,981,950,919,1013,982,951,1014,983,1015,792,824,793,856,825,794,888,857,826,795,889,858,827,890,859,891,
+    668,700,669,732,701,670,764,733,702,671,765,734,703,766,735,767,920,952,921,984,953,922,1016,985,954,923,1017,986,955,1018,987,1019,
+    796,828,797,860,829,798,892,861,830,799,893,862,831,894,863,895,924,956,925,988,957,926,1020,989,958,927,1021,990,959,1022,991,1023
+};
+
+const uint16_t* const g_scanOrder[NUM_SCAN_TYPE][NUM_SCAN_SIZE] =
+{
+    { g_scan4x4[0], g_scan8x8[0], g_scan16x16, g_scan32x32 },
+    { g_scan4x4[1], g_scan8x8[1], g_scan16x16, g_scan32x32 },
+    { g_scan4x4[2], g_scan8x8[2], g_scan16x16, g_scan32x32 }
+};
+
+const uint16_t* const g_scanOrderCG[NUM_SCAN_TYPE][NUM_SCAN_SIZE] =
+{
+    { g_scan4x4[0], g_scan2x2[0], g_scan4x4[0], g_scan8x8diag },
+    { g_scan4x4[1], g_scan2x2[1], g_scan4x4[0], g_scan8x8diag },
+    { g_scan4x4[2], g_scan2x2[0], g_scan4x4[0], g_scan8x8diag }
+};
+
+// Table used for encoding the last coefficient position. The index is the position.
+// The low 4 bits are the number of "1" in the prefix and the high 4 bits are the number
+// of bits in the suffix.
+const uint8_t g_lastCoeffTable[32] =
+{
+    0x00, 0x01, 0x02, 0x03, 0x14, 0x14, 0x15, 0x15,
+    0x26, 0x26, 0x26, 0x26, 0x27, 0x27, 0x27, 0x27,
+    0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38,
+    0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
+};
+
+// Rice parameters for absolute transform levels
+const uint8_t g_goRiceRange[5] = { 7, 14, 26, 46, 78 };
+
+const uint8_t g_lpsTable[64][4] =
+{
+    { 128, 176, 208, 240 },
+    { 128, 167, 197, 227 },
+    { 128, 158, 187, 216 },
+    { 123, 150, 178, 205 },
+    { 116, 142, 169, 195 },
+    { 111, 135, 160, 185 },
+    { 105, 128, 152, 175 },
+    { 100, 122, 144, 166 },
+    {  95, 116, 137, 158 },
+    {  90, 110, 130, 150 },
+    {  85, 104, 123, 142 },
+    {  81,  99, 117, 135 },
+    {  77,  94, 111, 128 },
+    {  73,  89, 105, 122 },
+    {  69,  85, 100, 116 },
+    {  66,  80,  95, 110 },
+    {  62,  76,  90, 104 },
+    {  59,  72,  86,  99 },
+    {  56,  69,  81,  94 },
+    {  53,  65,  77,  89 },
+    {  51,  62,  73,  85 },
+    {  48,  59,  69,  80 },
+    {  46,  56,  66,  76 },
+    {  43,  53,  63,  72 },
+    {  41,  50,  59,  69 },
+    {  39,  48,  56,  65 },
+    {  37,  45,  54,  62 },
+    {  35,  43,  51,  59 },
+    {  33,  41,  48,  56 },
+    {  32,  39,  46,  53 },
+    {  30,  37,  43,  50 },
+    {  29,  35,  41,  48 },
+    {  27,  33,  39,  45 },
+    {  26,  31,  37,  43 },
+    {  24,  30,  35,  41 },
+    {  23,  28,  33,  39 },
+    {  22,  27,  32,  37 },
+    {  21,  26,  30,  35 },
+    {  20,  24,  29,  33 },
+    {  19,  23,  27,  31 },
+    {  18,  22,  26,  30 },
+    {  17,  21,  25,  28 },
+    {  16,  20,  23,  27 },
+    {  15,  19,  22,  25 },
+    {  14,  18,  21,  24 },
+    {  14,  17,  20,  23 },
+    {  13,  16,  19,  22 },
+    {  12,  15,  18,  21 },
+    {  12,  14,  17,  20 },
+    {  11,  14,  16,  19 },
+    {  11,  13,  15,  18 },
+    {  10,  12,  15,  17 },
+    {  10,  12,  14,  16 },
+    {   9,  11,  13,  15 },
+    {   9,  11,  12,  14 },
+    {   8,  10,  12,  14 },
+    {   8,   9,  11,  13 },
+    {   7,   9,  11,  12 },
+    {   7,   9,  10,  12 },
+    {   7,   8,  10,  11 },
+    {   6,   8,   9,  11 },
+    {   6,   7,   9,  10 },
+    {   6,   7,   8,   9 },
+    {   2,   2,   2,   2 }
+};
+
+const uint8_t x265_exp2_lut[64] =
+{
+    0,  3,  6,  8,  11, 14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
+    48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98,  102,
+    106,  110,  114,  118,  122,  126,  130,  135,  139,  143,  147,  152,  156,  161,  165,  170,
+    175,  179,  184,  189,  194,  198,  203,  208,  214,  219,  224,  229,  234,  240,  245,  250
+};
+
+/* bFilter = g_intraFilterFlags[dir] & trSize */
+const uint8_t g_intraFilterFlags[NUM_INTRA_MODE] =
+{
+    0x38, 0x00,
+    0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+    0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+    0x38, 
+};
+
+/* Contains how much to increment shared depth buffer for different ctu sizes to get next best depth
+ * here, depth 0 = 64x64, depth 1 = 32x32, depth 2 = 16x16 and depth 3 = 8x8
+ * if ctu = 64, depth buffer size is 256 combination of depth values 0, 1, 2, 3
+ * if ctu = 32, depth buffer size is 64 combination of depth values 1, 2, 3
+ * if ctu = 16, depth buffer size is 16 combination of depth values 2, 3 */
+const uint32_t g_depthInc[3][4] =
+{
+    { 16,  4,  0, 0},
+    { 64, 16,  4, 1},
+    {256, 64, 16, 4}
+};
+
+/* g_depthScanIdx [y][x] */
+const uint32_t g_depthScanIdx[8][8] =
+{
+    {   0,   1,   4,   5,  16,  17,  20,  21,  },
+    {   2,   3,   6,   7,  18,  19,  22,  23,  },
+    {   8,   9,  12,  13,  24,  25,  28,  29,  },
+    {  10,  11,  14,  15,  26,  27,  30,  31,  },
+    {  32,  33,  36,  37,  48,  49,  52,  53,  },
+    {  34,  35,  38,  39,  50,  51,  54,  55,  },
+    {  40,  41,  44,  45,  56,  57,  60,  61,  },
+    {  42,  43,  46,  47,  58,  59,  62,  63,  }
+};
+
+}
--- a/x265/source/common/constants.h
+++ b/x265/source/common/constants.h
@ -0,0 +1,103 @@
+/*****************************************************************************
+ * Copyright (C) 2015 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_CONSTANTS_H
+#define X265_CONSTANTS_H
+
+#include "common.h"
+
+namespace X265_NS {
+// private namespace
+
+extern int g_ctuSizeConfigured;
+
+void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
+void initRasterToZscan(uint32_t maxFullDepth);
+
+extern double x265_lambda_tab[QP_MAX_MAX + 1];
+extern double x265_lambda2_tab[QP_MAX_MAX + 1];
+extern const uint16_t x265_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET + 1];
+
+enum { ChromaQPMappingTableSize = 70 };
+enum { AngleMapping422TableSize = 36 };
+
+extern const uint8_t g_chromaScale[ChromaQPMappingTableSize];
+extern const uint8_t g_chroma422IntraAngleMappingTable[AngleMapping422TableSize];
+
+// flexible conversion from relative to absolute index
+extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
+extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
+
+// conversion of partition index to picture pel position
+extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
+extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS];
+extern const uint8_t g_log2Size[MAX_CU_SIZE + 1]; // from size to log2(size)
+
+// global variable (CTU width/height, max. CU depth)
+extern uint32_t g_maxLog2CUSize;
+extern uint32_t g_maxCUSize;
+extern uint32_t g_maxCUDepth;
+extern uint32_t g_unitSizeDepth; // Depth at which 4x4 unit occurs from max CU size
+
+extern const int16_t g_t4[4][4];
+extern const int16_t g_t8[8][8];
+extern const int16_t g_t16[16][16];
+extern const int16_t g_t32[32][32];
+
+// Subpel interpolation defines and constants
+
+#define NTAPS_LUMA        8                            // Number of taps for luma
+#define NTAPS_CHROMA      4                            // Number of taps for chroma
+#define IF_INTERNAL_PREC 14                            // Number of bits for internal precision
+#define IF_FILTER_PREC    6                            // Log2 of sum of filter taps
+#define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) // Offset used internally
+#define SLFASE_CONSTANT  0x5f4e4a53
+
+extern const int16_t g_lumaFilter[4][NTAPS_LUMA];      // Luma filter taps
+extern const int16_t g_chromaFilter[8][NTAPS_CHROMA];  // Chroma filter taps
+
+// Scanning order & context mapping table
+
+#define NUM_SCAN_SIZE 4
+
+extern const uint16_t* const g_scanOrder[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
+extern const uint16_t* const g_scanOrderCG[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
+extern const uint16_t g_scan8x8diag[8 * 8];
+extern const uint16_t g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4];  // +1 for safe buffer area for codeCoeffNxN assembly optimize, there have up to 15 bytes beyond bound read
+
+extern const uint8_t g_lastCoeffTable[32];
+extern const uint8_t g_goRiceRange[5]; // maximum value coded with Rice codes
+
+// CABAC tables
+extern const uint8_t g_lpsTable[64][4];
+extern const uint8_t x265_exp2_lut[64];
+
+// Intra tables
+extern const uint8_t g_intraFilterFlags[NUM_INTRA_MODE];
+
+extern const uint32_t g_depthInc[3][4];
+extern const uint32_t g_depthScanIdx[8][8];
+
+}
+
+#endif
--- a/x265/source/common/contexts.h
+++ b/x265/source/common/contexts.h
@ -0,0 +1,311 @@
+/*****************************************************************************
+* Copyright (C) 2015 x265 project
+*
+* Authors: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#ifndef X265_CONTEXTS_H
+#define X265_CONTEXTS_H
+
+#include "common.h"
+
+#define NUM_SPLIT_FLAG_CTX          3   // number of context models for split flag
+#define NUM_SKIP_FLAG_CTX           3   // number of context models for skip flag
+
+#define NUM_MERGE_FLAG_EXT_CTX      1   // number of context models for merge flag of merge extended
+#define NUM_MERGE_IDX_EXT_CTX       1   // number of context models for merge index of merge extended
+
+#define NUM_PART_SIZE_CTX           4   // number of context models for partition size
+#define NUM_PRED_MODE_CTX           1   // number of context models for prediction mode
+
+#define NUM_ADI_CTX                 1   // number of context models for intra prediction
+
+#define NUM_CHROMA_PRED_CTX         2   // number of context models for intra prediction (chroma)
+#define NUM_INTER_DIR_CTX           5   // number of context models for inter prediction direction
+#define NUM_MV_RES_CTX              2   // number of context models for motion vector difference
+
+#define NUM_REF_NO_CTX              2   // number of context models for reference index
+#define NUM_TRANS_SUBDIV_FLAG_CTX   3   // number of context models for transform subdivision flags
+#define NUM_QT_CBF_CTX              7   // number of context models for QT CBF
+#define NUM_QT_ROOT_CBF_CTX         1   // number of context models for QT ROOT CBF
+#define NUM_DELTA_QP_CTX            3   // number of context models for dQP
+
+#define NUM_SIG_CG_FLAG_CTX         2   // number of context models for MULTI_LEVEL_SIGNIFICANCE
+
+#define NUM_SIG_FLAG_CTX            42  // number of context models for sig flag
+#define NUM_SIG_FLAG_CTX_LUMA       27  // number of context models for luma sig flag
+#define NUM_SIG_FLAG_CTX_CHROMA     15  // number of context models for chroma sig flag
+
+#define NUM_CTX_LAST_FLAG_XY        18  // number of context models for last coefficient position
+#define NUM_CTX_LAST_FLAG_XY_LUMA   15  // number of context models for last coefficient position of luma
+#define NUM_CTX_LAST_FLAG_XY_CHROMA 3   // number of context models for last coefficient position of chroma
+
+#define NUM_ONE_FLAG_CTX            24  // number of context models for greater than 1 flag
+#define NUM_ONE_FLAG_CTX_LUMA       16  // number of context models for greater than 1 flag of luma
+#define NUM_ONE_FLAG_CTX_CHROMA     8   // number of context models for greater than 1 flag of chroma
+#define NUM_ABS_FLAG_CTX            6   // number of context models for greater than 2 flag
+#define NUM_ABS_FLAG_CTX_LUMA       4   // number of context models for greater than 2 flag of luma
+#define NUM_ABS_FLAG_CTX_CHROMA     2   // number of context models for greater than 2 flag of chroma
+
+#define NUM_MVP_IDX_CTX             1   // number of context models for MVP index
+
+#define NUM_SAO_MERGE_FLAG_CTX      1   // number of context models for SAO merge flags
+#define NUM_SAO_TYPE_IDX_CTX        1   // number of context models for SAO type index
+
+#define NUM_TRANSFORMSKIP_FLAG_CTX  1   // number of context models for transform skipping
+#define NUM_TQUANT_BYPASS_FLAG_CTX  1
+#define CNU                         154 // dummy initialization value for unused context models 'Context model Not Used'
+
+// Offset for context
+#define OFF_SPLIT_FLAG_CTX         (0)
+#define OFF_SKIP_FLAG_CTX          (OFF_SPLIT_FLAG_CTX         +     NUM_SPLIT_FLAG_CTX)
+#define OFF_MERGE_FLAG_EXT_CTX     (OFF_SKIP_FLAG_CTX          +     NUM_SKIP_FLAG_CTX)
+#define OFF_MERGE_IDX_EXT_CTX      (OFF_MERGE_FLAG_EXT_CTX     +     NUM_MERGE_FLAG_EXT_CTX)
+#define OFF_PART_SIZE_CTX          (OFF_MERGE_IDX_EXT_CTX      +     NUM_MERGE_IDX_EXT_CTX)
+#define OFF_PRED_MODE_CTX          (OFF_PART_SIZE_CTX          +     NUM_PART_SIZE_CTX)
+#define OFF_ADI_CTX                (OFF_PRED_MODE_CTX          +     NUM_PRED_MODE_CTX)
+#define OFF_CHROMA_PRED_CTX        (OFF_ADI_CTX                +     NUM_ADI_CTX)
+#define OFF_DELTA_QP_CTX           (OFF_CHROMA_PRED_CTX        +     NUM_CHROMA_PRED_CTX)
+#define OFF_INTER_DIR_CTX          (OFF_DELTA_QP_CTX           +     NUM_DELTA_QP_CTX)
+#define OFF_REF_NO_CTX             (OFF_INTER_DIR_CTX          +     NUM_INTER_DIR_CTX)
+#define OFF_MV_RES_CTX             (OFF_REF_NO_CTX             +     NUM_REF_NO_CTX)
+#define OFF_QT_CBF_CTX             (OFF_MV_RES_CTX             +     NUM_MV_RES_CTX)
+#define OFF_TRANS_SUBDIV_FLAG_CTX  (OFF_QT_CBF_CTX             +     NUM_QT_CBF_CTX)
+#define OFF_QT_ROOT_CBF_CTX        (OFF_TRANS_SUBDIV_FLAG_CTX  +     NUM_TRANS_SUBDIV_FLAG_CTX)
+#define OFF_SIG_CG_FLAG_CTX        (OFF_QT_ROOT_CBF_CTX        +     NUM_QT_ROOT_CBF_CTX)
+#define OFF_SIG_FLAG_CTX           (OFF_SIG_CG_FLAG_CTX        + 2 * NUM_SIG_CG_FLAG_CTX)
+#define OFF_CTX_LAST_FLAG_X        (OFF_SIG_FLAG_CTX           +     NUM_SIG_FLAG_CTX)
+#define OFF_CTX_LAST_FLAG_Y        (OFF_CTX_LAST_FLAG_X        +     NUM_CTX_LAST_FLAG_XY)
+#define OFF_ONE_FLAG_CTX           (OFF_CTX_LAST_FLAG_Y        +     NUM_CTX_LAST_FLAG_XY)
+#define OFF_ABS_FLAG_CTX           (OFF_ONE_FLAG_CTX           +     NUM_ONE_FLAG_CTX)
+#define OFF_MVP_IDX_CTX            (OFF_ABS_FLAG_CTX           +     NUM_ABS_FLAG_CTX)
+#define OFF_SAO_MERGE_FLAG_CTX     (OFF_MVP_IDX_CTX            +     NUM_MVP_IDX_CTX)
+#define OFF_SAO_TYPE_IDX_CTX       (OFF_SAO_MERGE_FLAG_CTX     +     NUM_SAO_MERGE_FLAG_CTX)
+#define OFF_TRANSFORMSKIP_FLAG_CTX (OFF_SAO_TYPE_IDX_CTX       +     NUM_SAO_TYPE_IDX_CTX)
+#define OFF_TQUANT_BYPASS_FLAG_CTX (OFF_TRANSFORMSKIP_FLAG_CTX + 2 * NUM_TRANSFORMSKIP_FLAG_CTX)
+#define MAX_OFF_CTX_MOD            (OFF_TQUANT_BYPASS_FLAG_CTX +     NUM_TQUANT_BYPASS_FLAG_CTX)
+
+extern "C" const uint32_t PFX(entropyStateBits)[128];
+
+namespace X265_NS {
+// private namespace
+
+extern const uint32_t g_entropyBits[128];
+extern const uint8_t g_nextState[128][2];
+
+#define sbacGetMps(S)            ((S) & 1)
+#define sbacGetState(S)          ((S) >> 1)
+#define sbacNext(S, V)           (g_nextState[(S)][(V)])
+#define sbacGetEntropyBits(S, V) (g_entropyBits[(S) ^ (V)])
+#define sbacGetEntropyBitsTrm(V) (g_entropyBits[126 ^ (V)])
+
+#define MAX_NUM_CHANNEL_TYPE     2
+
+static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 2, 3, 4, 5, 6 } };
+static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] = { { 0,  9, 21 }, { 0,  9, 12 } };
+static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3]  = { { 9, 12,  6 }, { 9,  3,  3 } };
+static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE]   = {  6, 0  };
+static const uint32_t notFirstGroupNeighbourhoodContextOffset[MAX_NUM_CHANNEL_TYPE] = { 3, 0 };
+
+// initial probability for cu_transquant_bypass flag
+static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
+{
+    { 154 },
+    { 154 },
+    { 154 },
+};
+
+// initial probability for split flag
+static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
+{
+    { 107,  139,  126, },
+    { 107,  139,  126, },
+    { 139,  141,  157, },
+};
+
+static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
+{
+    { 197,  185,  201, },
+    { 197,  185,  201, },
+    { CNU,  CNU,  CNU, },
+};
+
+static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
+{
+    { 154, },
+    { 110, },
+    { CNU, },
+};
+
+static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
+{
+    { 137, },
+    { 122, },
+    { CNU, },
+};
+
+static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
+{
+    { 154,  139,  154, 154 },
+    { 154,  139,  154, 154 },
+    { 184,  CNU,  CNU, CNU },
+};
+
+static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
+{
+    { 134, },
+    { 149, },
+    { CNU, },
+};
+
+static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
+{
+    { 183, },
+    { 154, },
+    { 184, },
+};
+
+static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
+{
+    { 152,  139, },
+    { 152,  139, },
+    {  63,  139, },
+};
+
+static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
+{
+    {  95,   79,   63,   31,  31, },
+    {  95,   79,   63,   31,  31, },
+    { CNU,  CNU,  CNU,  CNU, CNU, },
+};
+
+static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
+{
+    { 169,  198, },
+    { 140,  198, },
+    { CNU,  CNU, },
+};
+
+static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
+{
+    { 153,  153 },
+    { 153,  153 },
+    { CNU,  CNU },
+};
+
+static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
+{
+    { 154,  154,  154, },
+    { 154,  154,  154, },
+    { 154,  154,  154, },
+};
+
+static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
+{
+    { 153,  111,  149,   92,  167,  154,  154 },
+    { 153,  111,  149,  107,  167,  154,  154 },
+    { 111,  141,   94,  138,  182,  154,  154 },
+};
+
+static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
+{
+    {  79, },
+    {  79, },
+    { CNU, },
+};
+
+static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
+{
+    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
+      108,  123,   93 },
+    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
+      108,  123,  108 },
+    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
+      108,  123,   63 },
+};
+
+static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
+{
+    { 121,  140,
+      61,  154, },
+    { 121,  140,
+      61,  154, },
+    {  91,  171,
+       134,  141, },
+};
+
+static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
+{
+    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
+    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
+    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
+};
+
+static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
+{
+    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
+    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
+    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
+};
+
+static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
+{
+    { 107,  167,   91,  107,  107,  167, },
+    { 107,  167,   91,  122,  107,  167, },
+    { 138,  153,  136,  167,  152,  152, },
+};
+
+static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
+{
+    { 168 },
+    { 168 },
+    { CNU },
+};
+
+static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
+{
+    { 153,  },
+    { 153,  },
+    { 153,  },
+};
+
+static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
+{
+    { 160, },
+    { 185, },
+    { 200, },
+};
+
+static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
+{
+    { 224,  167,  122, },
+    { 124,  138,   94, },
+    { 153,  138,  138, },
+};
+
+static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
+{
+    { 139,  139 },
+    { 139,  139 },
+    { 139,  139 },
+};
+}
+
+#endif // ifndef X265_CONTEXTS_H
--- a/x265/source/common/cpu.cpp
+++ b/x265/source/common/cpu.cpp
@ -0,0 +1,374 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Loren Merritt <lorenm@u.washington.edu>
+ *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Fiona Glaser <fiona@x264.com>
+ *          Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "cpu.h"
+#include "common.h"
+
+#if MACOS || SYS_FREEBSD
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+#if SYS_OPENBSD
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#endif
+
+#if X265_ARCH_ARM && !defined(HAVE_NEON)
+#include <signal.h>
+#include <setjmp.h>
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void sigill_handler(int sig)
+{
+    if (!canjump)
+    {
+        signal(sig, SIG_DFL);
+        raise(sig);
+    }
+
+    canjump = 0;
+    siglongjmp(jmpbuf, 1);
+}
+
+#endif // if X265_ARCH_ARM
+
+namespace X265_NS {
+const cpu_name_t cpu_names[] =
+{
+#if X265_ARCH_X86
+#define MMX2 X265_CPU_MMX | X265_CPU_MMX2 | X265_CPU_CMOV
+    { "MMX2",        MMX2 },
+    { "MMXEXT",      MMX2 },
+    { "SSE",         MMX2 | X265_CPU_SSE },
+#define SSE2 MMX2 | X265_CPU_SSE | X265_CPU_SSE2
+    { "SSE2Slow",    SSE2 | X265_CPU_SSE2_IS_SLOW },
+    { "SSE2",        SSE2 },
+    { "SSE2Fast",    SSE2 | X265_CPU_SSE2_IS_FAST },
+    { "SSE3",        SSE2 | X265_CPU_SSE3 },
+    { "SSSE3",       SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 },
+    { "SSE4.1",      SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 },
+    { "SSE4",        SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 },
+    { "SSE4.2",      SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 | X265_CPU_SSE42 },
+#define AVX SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 | X265_CPU_SSE42 | X265_CPU_AVX
+    { "AVX",         AVX },
+    { "XOP",         AVX | X265_CPU_XOP },
+    { "FMA4",        AVX | X265_CPU_FMA4 },
+    { "AVX2",        AVX | X265_CPU_AVX2 },
+    { "FMA3",        AVX | X265_CPU_FMA3 },
+#undef AVX
+#undef SSE2
+#undef MMX2
+    { "Cache32",         X265_CPU_CACHELINE_32 },
+    { "Cache64",         X265_CPU_CACHELINE_64 },
+    { "LZCNT",           X265_CPU_LZCNT },
+    { "BMI1",            X265_CPU_BMI1 },
+    { "BMI2",            X265_CPU_BMI1 | X265_CPU_BMI2 },
+    { "SlowCTZ",         X265_CPU_SLOW_CTZ },
+    { "SlowAtom",        X265_CPU_SLOW_ATOM },
+    { "SlowPshufb",      X265_CPU_SLOW_PSHUFB },
+    { "SlowPalignr",     X265_CPU_SLOW_PALIGNR },
+    { "SlowShuffle",     X265_CPU_SLOW_SHUFFLE },
+    { "UnalignedStack",  X265_CPU_STACK_MOD4 },
+
+#elif X265_ARCH_ARM
+    { "ARMv6",           X265_CPU_ARMV6 },
+    { "NEON",            X265_CPU_NEON },
+    { "FastNeonMRC",     X265_CPU_FAST_NEON_MRC },
+#endif // if X265_ARCH_X86
+    { "", 0 },
+};
+
+#if X265_ARCH_X86
+
+extern "C" {
+/* cpu-a.asm */
+int PFX(cpu_cpuid_test)(void);
+void PFX(cpu_cpuid)(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
+void PFX(cpu_xgetbv)(uint32_t op, uint32_t *eax, uint32_t *edx);
+}
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4309) // truncation of constant value
+#endif
+
+uint32_t cpu_detect(void)
+{
+    uint32_t cpu = 0;
+
+    uint32_t eax, ebx, ecx, edx;
+    uint32_t vendor[4] = { 0 };
+    uint32_t max_extended_cap, max_basic_cap;
+
+#if !X86_64
+    if (!PFX(cpu_cpuid_test)())
+        return 0;
+#endif
+
+    PFX(cpu_cpuid)(0, &eax, vendor + 0, vendor + 2, vendor + 1);
+    max_basic_cap = eax;
+    if (max_basic_cap == 0)
+        return 0;
+
+    PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
+    if (edx & 0x00800000)
+        cpu |= X265_CPU_MMX;
+    else
+        return cpu;
+    if (edx & 0x02000000)
+        cpu |= X265_CPU_MMX2 | X265_CPU_SSE;
+    if (edx & 0x00008000)
+        cpu |= X265_CPU_CMOV;
+    else
+        return cpu;
+    if (edx & 0x04000000)
+        cpu |= X265_CPU_SSE2;
+    if (ecx & 0x00000001)
+        cpu |= X265_CPU_SSE3;
+    if (ecx & 0x00000200)
+        cpu |= X265_CPU_SSSE3;
+    if (ecx & 0x00080000)
+        cpu |= X265_CPU_SSE4;
+    if (ecx & 0x00100000)
+        cpu |= X265_CPU_SSE42;
+    /* Check OXSAVE and AVX bits */
+    if ((ecx & 0x18000000) == 0x18000000)
+    {
+        /* Check for OS support */
+        PFX(cpu_xgetbv)(0, &eax, &edx);
+        if ((eax & 0x6) == 0x6)
+        {
+            cpu |= X265_CPU_AVX;
+            if (ecx & 0x00001000)
+                cpu |= X265_CPU_FMA3;
+        }
+    }
+
+    if (max_basic_cap >= 7)
+    {
+        PFX(cpu_cpuid)(7, &eax, &ebx, &ecx, &edx);
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if ((cpu & X265_CPU_AVX) && (ebx & 0x00000020))
+            cpu |= X265_CPU_AVX2;
+        if (ebx & 0x00000008)
+        {
+            cpu |= X265_CPU_BMI1;
+            if (ebx & 0x00000100)
+                cpu |= X265_CPU_BMI2;
+        }
+    }
+
+    if (cpu & X265_CPU_SSSE3)
+        cpu |= X265_CPU_SSE2_IS_FAST;
+
+    PFX(cpu_cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
+    max_extended_cap = eax;
+
+    if (max_extended_cap >= 0x80000001)
+    {
+        PFX(cpu_cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
+
+        if (ecx & 0x00000020)
+            cpu |= X265_CPU_LZCNT; /* Supported by Intel chips starting with Haswell */
+        if (ecx & 0x00000040) /* SSE4a, AMD only */
+        {
+            int family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+            cpu |= X265_CPU_SSE2_IS_FAST;      /* Phenom and later CPUs have fast SSE units */
+            if (family == 0x14)
+            {
+                cpu &= ~X265_CPU_SSE2_IS_FAST; /* SSSE3 doesn't imply fast SSE anymore... */
+                cpu |= X265_CPU_SSE2_IS_SLOW;  /* Bobcat has 64-bit SIMD units */
+                cpu |= X265_CPU_SLOW_PALIGNR;  /* palignr is insanely slow on Bobcat */
+            }
+            if (family == 0x16)
+            {
+                cpu |= X265_CPU_SLOW_PSHUFB;   /* Jaguar's pshufb isn't that slow, but it's slow enough
+                                                * compared to alternate instruction sequences that this
+                                                * is equal or faster on almost all such functions. */
+            }
+        }
+
+        if (cpu & X265_CPU_AVX)
+        {
+            if (ecx & 0x00000800) /* XOP */
+                cpu |= X265_CPU_XOP;
+            if (ecx & 0x00010000) /* FMA4 */
+                cpu |= X265_CPU_FMA4;
+        }
+
+        if (!strcmp((char*)vendor, "AuthenticAMD"))
+        {
+            if (edx & 0x00400000)
+                cpu |= X265_CPU_MMX2;
+            if (!(cpu & X265_CPU_LZCNT))
+                cpu |= X265_CPU_SLOW_CTZ;
+            if ((cpu & X265_CPU_SSE2) && !(cpu & X265_CPU_SSE2_IS_FAST))
+                cpu |= X265_CPU_SSE2_IS_SLOW; /* AMD CPUs come in two types: terrible at SSE and great at it */
+        }
+    }
+
+    if (!strcmp((char*)vendor, "GenuineIntel"))
+    {
+        PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
+        int family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+        int model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+        if (family == 6)
+        {
+            /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
+             * theoretically support sse2, but it's significantly slower than mmx for
+             * almost all of x264's functions, so let's just pretend they don't. */
+            if (model == 9 || model == 13 || model == 14)
+            {
+                cpu &= ~(X265_CPU_SSE2 | X265_CPU_SSE3);
+                X265_CHECK(!(cpu & (X265_CPU_SSSE3 | X265_CPU_SSE4)), "unexpected CPU ID %d\n", cpu);
+            }
+            /* Detect Atom CPU */
+            else if (model == 28)
+            {
+                cpu |= X265_CPU_SLOW_ATOM;
+                cpu |= X265_CPU_SLOW_CTZ;
+                cpu |= X265_CPU_SLOW_PSHUFB;
+            }
+
+            /* Conroe has a slow shuffle unit. Check the model number to make sure not
+             * to include crippled low-end Penryns and Nehalems that don't have SSE4. */
+            else if ((cpu & X265_CPU_SSSE3) && !(cpu & X265_CPU_SSE4) && model < 23)
+                cpu |= X265_CPU_SLOW_SHUFFLE;
+        }
+    }
+
+    if ((!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu & X265_CPU_SSE42))
+    {
+        /* cacheline size is specified in 3 places, any of which may be missing */
+        PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
+        int cache = (ebx & 0xff00) >> 5; // cflush size
+        if (!cache && max_extended_cap >= 0x80000006)
+        {
+            PFX(cpu_cpuid)(0x80000006, &eax, &ebx, &ecx, &edx);
+            cache = ecx & 0xff; // cacheline size
+        }
+        if (!cache && max_basic_cap >= 2)
+        {
+            // Cache and TLB Information
+            static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
+            static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67,
+                                                0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };
+            uint32_t buf[4];
+            int max, i = 0;
+            do
+            {
+                PFX(cpu_cpuid)(2, buf + 0, buf + 1, buf + 2, buf + 3);
+                max = buf[0] & 0xff;
+                buf[0] &= ~0xff;
+                for (int j = 0; j < 4; j++)
+                {
+                    if (!(buf[j] >> 31))
+                        while (buf[j])
+                        {
+                            if (strchr(cache32_ids, buf[j] & 0xff))
+                                cache = 32;
+                            if (strchr(cache64_ids, buf[j] & 0xff))
+                                cache = 64;
+                            buf[j] >>= 8;
+                        }
+                }
+            }
+            while (++i < max);
+        }
+
+        if (cache == 32)
+            cpu |= X265_CPU_CACHELINE_32;
+        else if (cache == 64)
+            cpu |= X265_CPU_CACHELINE_64;
+        else
+            x265_log(NULL, X265_LOG_WARNING, "unable to determine cacheline size\n");
+    }
+
+#if BROKEN_STACK_ALIGNMENT
+    cpu |= X265_CPU_STACK_MOD4;
+#endif
+
+    return cpu;
+}
+
+#elif X265_ARCH_ARM
+
+extern "C" {
+void PFX(cpu_neon_test)(void);
+int PFX(cpu_fast_neon_mrc_test)(void);
+}
+
+uint32_t cpu_detect(void)
+{
+    int flags = 0;
+
+#if HAVE_ARMV6
+    flags |= X265_CPU_ARMV6;
+
+    // don't do this hack if compiled with -mfpu=neon
+#if !HAVE_NEON
+    static void (* oldsig)(int);
+    oldsig = signal(SIGILL, sigill_handler);
+    if (sigsetjmp(jmpbuf, 1))
+    {
+        signal(SIGILL, oldsig);
+        return flags;
+    }
+
+    canjump = 1;
+    PFX(cpu_neon_test)();
+    canjump = 0;
+    signal(SIGILL, oldsig);
+#endif // if !HAVE_NEON
+
+    flags |= X265_CPU_NEON;
+
+    // fast neon -> arm (Cortex-A9) detection relies on user access to the
+    // cycle counter; this assumes ARMv7 performance counters.
+    // NEON requires at least ARMv7, ARMv8 may require changes here, but
+    // hopefully this hacky detection method will have been replaced by then.
+    // Note that there is potential for a race condition if another program or
+    // x264 instance disables or reinits the counters while x264 is using them,
+    // which may result in incorrect detection and the counters stuck enabled.
+    // right now Apple does not seem to support performance counters for this test
+#ifndef __MACH__
+    flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
+#endif
+    // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
+#endif // if HAVE_ARMV6
+    return flags;
+}
+
+#else // if X265_ARCH_X86
+
+uint32_t cpu_detect(void)
+{
+    return 0;
+}
+
+#endif // if X265_ARCH_X86
+}
--- a/x265/source/common/cpu.h
+++ b/x265/source/common/cpu.h
@ -0,0 +1,64 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Loren Merritt <lorenm@u.washington.edu>
+ *          Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_CPU_H
+#define X265_CPU_H
+
+#include "common.h"
+
+/* All assembly functions are prefixed with X265_NS (macro expanded) */
+#define PFX3(prefix, name) prefix ## _ ## name
+#define PFX2(prefix, name) PFX3(prefix, name)
+#define PFX(name)          PFX2(X265_NS, name)
+
+// from cpu-a.asm, if ASM primitives are compiled, else primitives.cpp
+extern "C" void PFX(cpu_emms)(void);
+extern "C" void PFX(safe_intel_cpu_indicator_init)(void);
+
+#if _MSC_VER && _WIN64
+#define x265_emms() PFX(cpu_emms)()
+#elif _MSC_VER
+#include <mmintrin.h>
+#define x265_emms() _mm_empty()
+#elif __GNUC__
+// Cannot use _mm_empty() directly without compiling all the source with
+// a fixed CPU arch, which we would like to avoid at the moment
+#define x265_emms() PFX(cpu_emms)()
+#else
+#define x265_emms() PFX(cpu_emms)()
+#endif
+
+namespace X265_NS {
+uint32_t cpu_detect(void);
+
+struct cpu_name_t
+{
+    char name[16];
+    uint32_t flags;
+};
+
+extern const cpu_name_t cpu_names[];
+}
+
+#endif // ifndef X265_CPU_H
--- a/x265/source/common/cudata.cpp
+++ b/x265/source/common/cudata.cpp
--- a/x265/source/common/cudata.h
+++ b/x265/source/common/cudata.h
@ -0,0 +1,362 @@
+/*****************************************************************************
+ * Copyright (C) 2015 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_CUDATA_H
+#define X265_CUDATA_H
+
+#include "common.h"
+#include "slice.h"
+#include "mv.h"
+
+namespace X265_NS {
+// private namespace
+
+class FrameData;
+class Slice;
+struct TUEntropyCodingParameters;
+struct CUDataMemPool;
+
+enum PartSize
+{
+    SIZE_2Nx2N, // symmetric motion partition,  2Nx2N
+    SIZE_2NxN,  // symmetric motion partition,  2Nx N
+    SIZE_Nx2N,  // symmetric motion partition,   Nx2N
+    SIZE_NxN,   // symmetric motion partition,   Nx N
+    SIZE_2NxnU, // asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
+    SIZE_2NxnD, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
+    SIZE_nLx2N, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
+    SIZE_nRx2N, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
+    NUM_SIZES
+};
+
+enum PredMode
+{
+    MODE_NONE  = 0,
+    MODE_INTER = (1 << 0),
+    MODE_INTRA = (1 << 1),
+    MODE_SKIP  = (1 << 2) | MODE_INTER
+};
+
+// motion vector predictor direction used in AMVP
+enum MVP_DIR
+{
+    MD_LEFT = 0,    // MVP of left block
+    MD_ABOVE,       // MVP of above block
+    MD_ABOVE_RIGHT, // MVP of above right block
+    MD_BELOW_LEFT,  // MVP of below left block
+    MD_ABOVE_LEFT,  // MVP of above left block
+    MD_COLLOCATED   // MVP of temporal neighbour
+};
+
+struct CUGeom
+{
+    enum {
+        INTRA           = 1<<0, // CU is intra predicted
+        PRESENT         = 1<<1, // CU is not completely outside the frame
+        SPLIT_MANDATORY = 1<<2, // CU split is mandatory if CU is inside frame and can be split
+        LEAF            = 1<<3, // CU is a leaf node of the CTU
+        SPLIT           = 1<<4, // CU is currently split in four child CUs.
+    };
+    
+    // (1 + 4 + 16 + 64) = 85.
+    enum { MAX_GEOMS = 85 };
+
+    uint32_t log2CUSize;    // Log of the CU size.
+    uint32_t childOffset;   // offset of the first child CU from current CU
+    uint32_t absPartIdx;    // Part index of this CU in terms of 4x4 blocks.
+    uint32_t numPartitions; // Number of 4x4 blocks in the CU
+    uint32_t flags;         // CU flags.
+    uint32_t depth;         // depth of this CU relative from CTU
+};
+
+struct MVField
+{
+    MV  mv;
+    int refIdx;
+};
+
+// Structure that keeps the neighbour's MV information.
+struct InterNeighbourMV
+{
+    // Neighbour MV. The index represents the list.
+    MV mv[2];
+
+    // Collocated right bottom CU addr.
+    uint32_t cuAddr[2];
+
+    // For spatial prediction, this field contains the reference index
+    // in each list (-1 if not available).
+    //
+    // For temporal prediction, the first value is used for the 
+    // prediction with list 0. The second value is used for the prediction 
+    // with list 1. For each value, the first four bits are the reference index 
+    // associated to the PMV, and the fifth bit is the list associated to the PMV.
+    // if both reference indices are -1, then unifiedRef is also -1
+    union { int16_t refIdx[2]; int32_t unifiedRef; };
+};
+
+typedef void(*cucopy_t)(uint8_t* dst, uint8_t* src); // dst and src are aligned to MIN(size, 32)
+typedef void(*cubcast_t)(uint8_t* dst, uint8_t val); // dst is aligned to MIN(size, 32)
+
+// Partition count table, index represents partitioning mode.
+const uint32_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
+
+// Partition table.
+// First index is partitioning mode. Second index is partition index.
+// Third index is 0 for partition sizes, 1 for partition offsets. The 
+// sizes and offsets are encoded as two packed 4-bit values (X,Y). 
+// X and Y represent 1/4 fractions of the block size.
+const uint32_t partTable[8][4][2] =
+{
+    //        XY
+    { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N.
+    { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN.
+    { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N.
+    { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN.
+    { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU.
+    { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD.
+    { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N.
+    { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } }  // SIZE_nRx2N.
+};
+
+// Partition Address table.
+// First index is partitioning mode. Second index is partition address.
+const uint32_t partAddrTable[8][4] =
+{
+    { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N.
+    { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN.
+    { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N.
+    { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN.
+    { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU.
+    { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD.
+    { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N.
+    { 0x00, 0x05, 0x05, 0x05 }  // SIZE_nRx2N.
+};
+
+// Holds part data for a CU of a given size, from an 8x8 CU to a CTU
+class CUData
+{
+public:
+
+    static cubcast_t s_partSet[NUM_FULL_DEPTH]; // pointer to broadcast set functions per absolute depth
+    static uint32_t  s_numPartInCUSize;
+
+    FrameData*    m_encData;
+    const Slice*  m_slice;
+
+    cucopy_t      m_partCopy;         // pointer to function that copies m_numPartitions elements
+    cubcast_t     m_partSet;          // pointer to function that sets m_numPartitions elements
+    cucopy_t      m_subPartCopy;      // pointer to function that copies m_numPartitions/4 elements, may be NULL
+    cubcast_t     m_subPartSet;       // pointer to function that sets m_numPartitions/4 elements, may be NULL
+
+    uint32_t      m_cuAddr;           // address of CTU within the picture in raster order
+    uint32_t      m_absIdxInCTU;      // address of CU within its CTU in Z scan order
+    uint32_t      m_cuPelX;           // CU position within the picture, in pixels (X)
+    uint32_t      m_cuPelY;           // CU position within the picture, in pixels (Y)
+    uint32_t      m_numPartitions;    // maximum number of 4x4 partitions within this CU
+
+    uint32_t      m_chromaFormat;
+    uint32_t      m_hChromaShift;
+    uint32_t      m_vChromaShift;
+
+    /* Per-part data, stored contiguously */
+    int8_t*       m_qp;               // array of QP values
+    uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems redundant to depth
+    uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
+    uint8_t*      m_tqBypass;         // array of CU lossless flags
+    int8_t*       m_refIdx[2];        // array of motion reference indices per list
+    uint8_t*      m_cuDepth;          // array of depths
+    uint8_t*      m_predMode;         // array of prediction modes
+    uint8_t*      m_partSize;         // array of partition sizes
+    uint8_t*      m_mergeFlag;        // array of merge flags
+    uint8_t*      m_interDir;         // array of inter directions
+    uint8_t*      m_mvpIdx[2];        // array of motion vector predictor candidates or merge candidate indices [0]
+    uint8_t*      m_tuDepth;          // array of transform indices
+    uint8_t*      m_transformSkip[3]; // array of transform skipping flags per plane
+    uint8_t*      m_cbf[3];           // array of coded block flags (CBF) per plane
+    uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)
+    enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
+
+    coeff_t*      m_trCoeff[3];       // transformed coefficient buffer per plane
+
+    MV*           m_mv[2];            // array of motion vectors per list
+    MV*           m_mvd[2];           // array of coded motion vector deltas per list
+    enum { TMVP_UNIT_MASK = 0xF0 };  // mask for mapping index to into a compressed (reference) MV field
+
+    const CUData* m_cuAboveLeft;      // pointer to above-left neighbor CTU
+    const CUData* m_cuAboveRight;     // pointer to above-right neighbor CTU
+    const CUData* m_cuAbove;          // pointer to above neighbor CTU
+    const CUData* m_cuLeft;           // pointer to left neighbor CTU
+
+    CUData();
+
+    void     initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance);
+    static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
+
+    void     initCTU(const Frame& frame, uint32_t cuAddr, int qp);
+    void     initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
+    void     initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
+
+    void     copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
+    void     setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx);
+    void     copyToPic(uint32_t depth) const;
+
+    /* RD-0 methods called only from encodeResidue */
+    void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
+    void     updatePic(uint32_t depth) const;
+
+    void     setPartSizeSubParts(PartSize size)    { m_partSet(m_partSize, (uint8_t)size); }
+    void     setPredModeSubParts(PredMode mode)    { m_partSet(m_predMode, (uint8_t)mode); }
+    void     clearCbf()                            { m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); }
+
+    /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */
+    void     setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth)                    { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }
+    void     setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t depth)         { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); }
+    void     setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth)        { s_partSet[depth](m_lumaIntraDir + absPartIdx, dir); }
+    void     setChromIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth)       { s_partSet[depth](m_chromaIntraDir + absPartIdx, dir); }
+    void     setCbfSubParts(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_cbf[ttype] + absPartIdx, cbf); }
+    void     setCbfPartRange(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_cbf[ttype] + absPartIdx, cbf, coveredPartIdxes); }
+    void     setTransformSkipSubParts(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_transformSkip[ttype] + absPartIdx, tskip); }
+    void     setTransformSkipPartRange(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_transformSkip[ttype] + absPartIdx, tskip, coveredPartIdxes); }
+
+    bool     setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth);
+
+    void     setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx);
+    void     setPUMv(int list, const MV& mv, int absPartIdx, int puIdx);
+    void     setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
+
+    uint8_t  getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
+    uint8_t  getQtRootCbf(uint32_t absPartIdx) const                             { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
+    int8_t   getRefQP(uint32_t currAbsIdxInCTU) const;
+    uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const;
+    void     clipMv(MV& outMV) const;
+    int      getPMV(InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx, MV* amvpCand, MV* pmv) const;
+    void     getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const;
+    void     getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
+    void     getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
+    uint32_t getBestRefIdx(uint32_t subPartIdx) const { return ((m_interDir[subPartIdx] & 1) << m_refIdx[0][subPartIdx]) | 
+                                                              (((m_interDir[subPartIdx] >> 1) & 1) << (m_refIdx[1][subPartIdx] + 16)); }
+    uint32_t getPUOffset(uint32_t puIdx, uint32_t absPartIdx) const { return (partAddrTable[(int)m_partSize[absPartIdx]][puIdx] << (g_unitSizeDepth - m_cuDepth[absPartIdx]) * 2) >> 4; }
+
+    uint32_t getNumPartInter(uint32_t absPartIdx) const              { return nbPartsTable[(int)m_partSize[absPartIdx]]; }
+    bool     isIntra(uint32_t absPartIdx) const   { return m_predMode[absPartIdx] == MODE_INTRA; }
+    bool     isInter(uint32_t absPartIdx) const   { return !!(m_predMode[absPartIdx] & MODE_INTER); }
+    bool     isSkipped(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_SKIP; }
+    bool     isBipredRestriction() const          { return m_log2CUSize[0] == 3 && m_partSize[0] != SIZE_2Nx2N; }
+
+    void     getPartIndexAndSize(uint32_t puIdx, uint32_t& absPartIdx, int& puWidth, int& puHeight) const;
+    void     getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& mvField) const;
+
+    void     getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const;
+    int      getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const;
+
+    uint32_t getSCUAddr() const                  { return (m_cuAddr << g_unitSizeDepth * 2) + m_absIdxInCTU; }
+    uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const;
+    uint32_t getCtxSkipFlag(uint32_t absPartIdx) const;
+    void     getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const;
+
+    const CUData* getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const;
+    const CUData* getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const;
+    const CUData* getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const;
+    const CUData* getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const;
+    const CUData* getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const;
+
+    const CUData* getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t currAbsIdxInCTU) const;
+    const CUData* getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t currAbsIdxInCTU) const;
+
+    const CUData* getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const;
+    const CUData* getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const;
+
+protected:
+
+    template<typename T>
+    void setAllPU(T *p, const T& val, int absPartIdx, int puIdx);
+
+    int8_t getLastCodedQP(uint32_t absPartIdx) const;
+    int  getLastValidPartIdx(int absPartIdx) const;
+
+    bool hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const;
+
+    /* Check whether the current PU and a spatial neighboring PU are in same merge region */
+    bool isDiffMER(int xN, int yN, int xP, int yP) const { return ((xN >> 2) != (xP >> 2)) || ((yN >> 2) != (yP >> 2)); }
+
+    // add possible motion vector predictor candidates
+    bool getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const;
+    bool getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx) const;
+    void getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const;
+
+    bool getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int absPartIdx) const;
+    bool getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const;
+
+    MV scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const;
+
+    void     deriveLeftRightTopIdx(uint32_t puIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const;
+
+    uint32_t deriveCenterIdx(uint32_t puIdx) const;
+    uint32_t deriveRightBottomIdx(uint32_t puIdx) const;
+    uint32_t deriveLeftBottomIdx(uint32_t puIdx) const;
+};
+
+// TU settings for entropy encoding
+struct TUEntropyCodingParameters
+{
+    const uint16_t *scan;
+    const uint16_t *scanCG;
+    ScanType        scanType;
+    uint32_t        log2TrSizeCG;
+    uint32_t        firstSignificanceMapContext;
+};
+
+struct CUDataMemPool
+{
+    uint8_t* charMemBlock;
+    coeff_t* trCoeffMemBlock;
+    MV*      mvMemBlock;
+
+    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; }
+
+    bool create(uint32_t depth, uint32_t csp, uint32_t numInstances)
+    {
+        uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2);
+        uint32_t cuSize = g_maxCUSize >> depth;
+        uint32_t sizeL = cuSize * cuSize;
+        uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
+        CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);
+        CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
+        CHECKED_MALLOC(mvMemBlock, MV, numPartition * 4 * numInstances);
+        return true;
+
+    fail:
+        return false;
+    }
+
+    void destroy()
+    {
+        X265_FREE(trCoeffMemBlock);
+        X265_FREE(mvMemBlock);
+        X265_FREE(charMemBlock);
+    }
+};
+}
+
+#endif // ifndef X265_CUDATA_H
--- a/x265/source/common/dct.cpp
+++ b/x265/source/common/dct.cpp
--- a/x265/source/common/deblock.cpp
+++ b/x265/source/common/deblock.cpp
@ -0,0 +1,555 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#include "common.h"
+#include "deblock.h"
+#include "framedata.h"
+#include "picyuv.h"
+#include "slice.h"
+#include "mv.h"
+
+using namespace X265_NS;
+
+#define DEBLOCK_SMALLEST_BLOCK  8
+#define DEFAULT_INTRA_TC_OFFSET 2
+
+void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
+{
+    uint8_t blockStrength[MAX_NUM_PARTITIONS];
+
+    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
+
+    deblockCU(ctu, cuGeom, dir, blockStrength);
+}
+
+static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
+{
+    if (dir == Deblock::EDGE_VER)
+    {
+        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
+        {
+            uint32_t    tempPartIdx;
+            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
+            return tempCU ? 2 : 0;
+        }
+    }
+    else
+    {
+        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
+        {
+            uint32_t    tempPartIdx;
+            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
+            return tempCU ? 2 : 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Deblocking filter process in CU-based (the same function as conventional's)
+ * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
+void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
+{
+    uint32_t absPartIdx = cuGeom.absPartIdx;
+    uint32_t depth = cuGeom.depth;
+    if (cu->m_predMode[absPartIdx] == MODE_NONE)
+        return;
+
+    if (cu->m_cuDepth[absPartIdx] > depth)
+    {
+        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
+        {
+            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+            if (childGeom.flags & CUGeom::PRESENT)
+                deblockCU(cu, childGeom, dir, blockStrength);
+        }
+        return;
+    }
+
+    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
+    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
+    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
+    setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
+
+    uint32_t numParts = cuGeom.numPartitions;
+    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
+    {
+        uint32_t bsCheck = !(partIdx & (1 << dir));
+
+        if (bsCheck && blockStrength[partIdx])
+            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
+    }
+
+    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
+    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
+    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
+        
+    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
+    {
+        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
+        if (cu->m_chromaFormat != X265_CSP_I400 && !((e0 + e) & chromaMask))
+            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
+    }
+}
+
+static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
+{
+    uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize;
+
+    if (dir)
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numUnits + baseUnitIdx];
+    else
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numUnits + edgeIdx];
+}
+
+void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
+{
+    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
+    for (uint32_t i = 0; i < numUnits; i++)
+    {
+        const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i);
+        blockStrength[bsidx] = value;
+    }
+}
+
+void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
+{
+    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
+    if (cu->m_tuDepth[absPartIdx] > tuDepth)
+    {
+        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
+        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
+        return;
+    }
+
+    uint32_t numUnits  = 1 << (log2TrSize - LOG2_UNIT_SIZE);
+    setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits);
+}
+
+void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
+{
+    const uint32_t hNumUnits = numUnits >> 1;
+    const uint32_t qNumUnits = numUnits >> 2;
+
+    switch (cu->m_partSize[absPartIdx])
+    {
+    case SIZE_2NxN:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_Nx2N:
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_NxN:
+        setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_2NxnU:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_nLx2N:
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_2NxnD:
+        if (EDGE_HOR == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
+        break;
+    case SIZE_nRx2N:
+        if (EDGE_VER == dir)
+            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
+        break;
+
+    case SIZE_2Nx2N:
+    default:
+        break;
+    }
+}
+
+uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
+{
+    // Calculate block index
+    uint32_t partP;
+    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+
+    // Set BS for Intra MB : BS = 2
+    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
+        return 2;
+
+    // Set BS for not Intra MB : BS = 1 or 0
+    if (blockStrength[partQ] > 1 &&
+        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
+         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
+        return 1;
+
+    static const MV zeroMv(0, 0);
+    const Slice* const sliceQ = cuQ->m_slice;
+    const Slice* const sliceP = cuP->m_slice;
+
+    const Frame* refP0 = sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]];
+    const Frame* refQ0 = sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]];
+    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
+    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
+
+    if (sliceQ->isInterP() && sliceP->isInterP())
+    {
+        return ((refP0 != refQ0) ||
+                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
+    }
+
+    // (sliceQ->isInterB() || sliceP->isInterB())
+    const Frame* refP1 = sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]];
+    const Frame* refQ1 = sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]];
+    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
+    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
+
+    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
+    {
+        if (refP0 != refP1) // Different L0 & L1
+        {
+            if (refP0 == refQ0)
+                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
+                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
+            else
+                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
+                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
+        }
+        else // Same L0 & L1
+        {
+            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
+                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
+                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
+                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
+        }
+    }
+        
+    // for all different Ref_Idx
+    return 1;
+}
+
+static inline int32_t calcDP(pixel* src, intptr_t offset)
+{
+    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
+}
+
+static inline int32_t calcDQ(pixel* src, intptr_t offset)
+{
+    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
+}
+
+static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
+{
+    int16_t m4     = (int16_t)src[0];
+    int16_t m3     = (int16_t)src[-offset];
+    int16_t m7     = (int16_t)src[offset * 3];
+    int16_t m0     = (int16_t)src[-offset * 4];
+    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
+
+    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+}
+
+/* Deblocking for the luminance component with strong or weak filter
+ * \param src     pointer to picture data
+ * \param offset  offset value for picture data
+ * \param tc      tc value
+ * \param maskP   indicator to enable filtering on partP
+ * \param maskQ   indicator to enable filtering on partQ
+ * \param maskP1  decision weak filter/no filter for partP
+ * \param maskQ1  decision weak filter/no filter for partQ */
+static inline void pelFilterLumaStrong(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
+{
+    int32_t tc2 = 2 * tc;
+    int32_t tcP = (tc2 & maskP);
+    int32_t tcQ = (tc2 & maskQ);
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
+        int16_t m6  = (int16_t)src[offset * 2];
+        int16_t m1  = (int16_t)src[-offset * 3];
+        int16_t m7  = (int16_t)src[offset * 3];
+        int16_t m0  = (int16_t)src[-offset * 4];
+        src[-offset * 3] = (pixel)(x265_clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+        src[-offset * 2] = (pixel)(x265_clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+        src[-offset]     = (pixel)(x265_clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+        src[0]           = (pixel)(x265_clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+        src[offset]      = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+        src[offset * 2]  = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
+    }
+}
+
+/* Weak filter */
+static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
+                                 int32_t maskP1, int32_t maskQ1)
+{
+    int32_t thrCut = tc * 10;
+    int32_t tc2 = tc >> 1;
+    maskP1 &= maskP;
+    maskQ1 &= maskQ;
+
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
+
+        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+
+        if (abs(delta) < thrCut)
+        {
+            delta = x265_clip3(-tc, tc, delta);
+
+            src[-offset] = x265_clip(m3 + (delta & maskP));
+            src[0] = x265_clip(m4 - (delta & maskQ));
+            if (maskP1)
+            {
+                int16_t m1  = (int16_t)src[-offset * 3];
+                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+                src[-offset * 2] = x265_clip(m2 + delta1);
+            }
+            if (maskQ1)
+            {
+                int16_t m6  = (int16_t)src[offset * 2];
+                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+                src[offset] = x265_clip(m5 + delta2);
+            }
+        }
+    }
+}
+
+/* Deblocking of one line/column for the chrominance component
+ * \param src     pointer to picture data
+ * \param offset  offset value for picture data
+ * \param tc      tc value
+ * \param maskP   indicator to disable filtering on partP
+ * \param maskQ   indicator to disable filtering on partQ */
+static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
+{
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4  = (int16_t)src[0];
+        int16_t m3  = (int16_t)src[-offset];
+        int16_t m5  = (int16_t)src[offset];
+        int16_t m2  = (int16_t)src[-offset * 2];
+
+        int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
+        src[-offset] = x265_clip(m3 + (delta & maskP));
+        src[0] = x265_clip(m4 - (delta & maskQ));
+    }
+}
+
+void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
+{
+    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
+    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
+    intptr_t stride = reconPic->m_stride;
+    const PPS* pps = cuQ->m_slice->m_pps;
+
+    intptr_t offset, srcStep;
+
+    int32_t maskP = -1;
+    int32_t maskQ = -1;
+    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
+    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
+    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
+
+    if (dir == EDGE_VER)
+    {
+        offset = 1;
+        srcStep = stride;
+        src += (edge << LOG2_UNIT_SIZE);
+    }
+    else // (dir == EDGE_HOR)
+    {
+        offset = stride;
+        srcStep = 1;
+        src += (edge << LOG2_UNIT_SIZE) * stride;
+    }
+
+    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
+    for (uint32_t idx = 0; idx < numUnits; idx++)
+    {
+        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx);
+        uint32_t bs = blockStrength[partQ];
+
+        if (!bs)
+            continue;
+
+        // Derive neighboring PU index
+        uint32_t partP;
+        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+
+        if (bCheckNoFilter)
+        {
+            // check if each of PUs is lossless coded
+            maskP = cuP->m_tqBypass[partP] - 1;
+            maskQ = cuQ->m_tqBypass[partQ] - 1;
+            if (!(maskP | maskQ))
+                continue;
+        }
+
+        int32_t qpQ = cuQ->m_qp[partQ];
+        int32_t qpP = cuP->m_qp[partP];
+        int32_t qp  = (qpP + qpQ + 1) >> 1;
+
+        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
+
+        const int32_t bitdepthShift = X265_DEPTH - 8;
+        int32_t beta = s_betaTable[indexB] << bitdepthShift;
+
+        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
+        int32_t dp0 = calcDP(src + unitOffset              , offset);
+        int32_t dq0 = calcDQ(src + unitOffset              , offset);
+        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
+        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
+        int32_t d0 = dp0 + dq0;
+        int32_t d3 = dp3 + dq3;
+
+        int32_t d =  d0 + d3;
+
+        if (d >= beta)
+            continue;
+
+        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+
+        bool sw = (2 * d0 < (beta >> 2) &&
+                   2 * d3 < (beta >> 2) &&
+                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
+                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
+
+        if (sw)
+            pelFilterLumaStrong(src + unitOffset, srcStep, offset, tc, maskP, maskQ);
+        else
+        {
+            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+            int32_t dp = dp0 + dp3;
+            int32_t dq = dq0 + dq3;
+            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
+            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
+
+            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
+        }
+    }
+}
+
+void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
+{
+    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
+    intptr_t offset, srcStep;
+    const PPS* pps = cuQ->m_slice->m_pps;
+
+    int32_t maskP = -1;
+    int32_t maskQ = -1;
+    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
+
+    X265_CHECK(((dir == EDGE_VER)
+                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
+                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
+               "invalid edge\n");
+
+    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
+    intptr_t stride = reconPic->m_strideC;
+    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
+    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
+
+    if (dir == EDGE_VER)
+    {
+        chromaShift = cuQ->m_vChromaShift;
+        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
+        offset     = 1;
+        srcStep    = stride;
+    }
+    else // (dir == EDGE_HOR)
+    {
+        chromaShift = cuQ->m_hChromaShift;
+        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
+        offset     = stride;
+        srcStep    = 1;
+    }
+
+    pixel* srcChroma[2];
+    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
+    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
+
+    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
+    for (uint32_t idx = 0; idx < numUnits; idx++)
+    {
+        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift);
+        uint32_t bs = blockStrength[partQ];
+
+        if (bs <= 1)
+            continue;
+
+        // Derive neighboring PU index
+        uint32_t partP;
+        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+
+        if (bCheckNoFilter)
+        {
+            // check if each of PUs is lossless coded
+            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
+            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
+            if (!(maskP | maskQ))
+                continue;
+        }
+
+        int32_t qpQ = cuQ->m_qp[partQ];
+        int32_t qpP = cuP->m_qp[partP];
+        int32_t qpA = (qpP + qpQ + 1) >> 1;
+
+        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
+        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
+        {
+            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
+            if (qp >= 30)
+                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
+
+            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+            const int32_t bitdepthShift = X265_DEPTH - 8;
+            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+            pixel* srcC = srcChroma[chromaIdx];
+
+            pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
+        }
+    }
+}
+
+const uint8_t Deblock::s_tcTable[54] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
+    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
+};
+
+const uint8_t Deblock::s_betaTable[52] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
+};
+
--- a/x265/source/common/deblock.h
+++ b/x265/source/common/deblock.h
@ -0,0 +1,63 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#ifndef X265_DEBLOCK_H
+#define X265_DEBLOCK_H
+
+#include "common.h"
+
+namespace X265_NS {
+// private namespace
+
+class CUData;
+struct CUGeom;
+
+class Deblock
+{
+public:
+    enum { EDGE_VER, EDGE_HOR };
+
+    void deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir);
+
+protected:
+
+    // CU-level deblocking function
+    void deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[]);
+
+    // set filtering functions
+    void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[]);
+    void setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits);
+    void setEdgefilterMultiple(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
+
+    // get filtering functions
+    uint8_t getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]);
+
+    // filter luma/chroma functions
+    void edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]);
+    void edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]);
+
+    static const uint8_t s_tcTable[54];
+    static const uint8_t s_betaTable[52];
+};
+}
+#endif // ifndef X265_DEBLOCK_H
--- a/x265/source/common/frame.cpp
+++ b/x265/source/common/frame.cpp
@ -0,0 +1,129 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#include "common.h"
+#include "frame.h"
+#include "picyuv.h"
+#include "framedata.h"
+
+using namespace X265_NS;
+
+Frame::Frame()
+{
+    m_bChromaExtended = false;
+    m_lowresInit = false;
+    m_reconRowCount.set(0);
+    m_countRefEncoders = 0;
+    m_encData = NULL;
+    m_reconPic = NULL;
+    m_quantOffsets = NULL;
+    m_next = NULL;
+    m_prev = NULL;
+    m_param = NULL;
+    memset(&m_lowres, 0, sizeof(m_lowres));
+}
+
+bool Frame::create(x265_param *param, float* quantOffsets)
+{
+    m_fencPic = new PicYuv;
+    m_param = param;
+
+    if (m_fencPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp) &&
+        m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode))
+    {
+        if (quantOffsets)
+        {
+            int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
+            m_quantOffsets = new float[cuCount];
+        }
+        return true;
+    }
+    return false;
+}
+
+bool Frame::allocEncodeData(x265_param *param, const SPS& sps)
+{
+    m_encData = new FrameData;
+    m_reconPic = new PicYuv;
+    m_encData->m_reconPic = m_reconPic;
+    bool ok = m_encData->create(*param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
+    if (ok)
+    {
+        /* initialize right border of m_reconpicYuv as SAO may read beyond the
+         * end of the picture accessing uninitialized pixels */
+        int maxHeight = sps.numCuInHeight * g_maxCUSize;
+        memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) * m_reconPic->m_stride * maxHeight);
+        if (m_reconPic->m_picCsp != X265_CSP_I400) {
+            memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
+            memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
+        }
+
+        /* use pre-calculated cu/pu offsets cached in the SPS structure */
+        m_reconPic->m_cuOffsetY = sps.cuOffsetY;
+        m_reconPic->m_buOffsetY = sps.buOffsetY;
+        if (m_reconPic->m_picCsp != X265_CSP_I400) {
+            m_reconPic->m_cuOffsetC = sps.cuOffsetC;
+            m_reconPic->m_buOffsetC = sps.buOffsetC;
+        }
+    }
+    return ok;
+}
+
+/* prepare to re-use a FrameData instance to encode a new picture */
+void Frame::reinit(const SPS& sps)
+{
+    m_bChromaExtended = false;
+    m_reconPic = m_encData->m_reconPic;
+    m_encData->reinit(sps);
+}
+
+void Frame::destroy()
+{
+    if (m_encData)
+    {
+        m_encData->destroy();
+        delete m_encData;
+        m_encData = NULL;
+    }
+
+    if (m_fencPic)
+    {
+        m_fencPic->destroy();
+        delete m_fencPic;
+        m_fencPic = NULL;
+    }
+
+    if (m_reconPic)
+    {
+        m_reconPic->destroy();
+        delete m_reconPic;
+        m_reconPic = NULL;
+    }
+
+    if (m_quantOffsets)
+    {
+        delete[] m_quantOffsets;
+    }
+
+    m_lowres.destroy();
+}
--- a/x265/source/common/frame.h
+++ b/x265/source/common/frame.h
@ -0,0 +1,81 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#ifndef X265_FRAME_H
+#define X265_FRAME_H
+
+#include "common.h"
+#include "lowres.h"
+#include "threading.h"
+
+namespace X265_NS {
+// private namespace
+
+class FrameData;
+class PicYuv;
+struct SPS;
+
+#define IS_REFERENCED(frame) (frame->m_lowres.sliceType != X265_TYPE_B) 
+
+class Frame
+{
+public:
+
+    /* These two items will be NULL until the Frame begins to be encoded, at which point
+     * it will be assigned a FrameData instance, which comes with a reconstructed image PicYuv */
+    FrameData*             m_encData;
+    PicYuv*                m_reconPic;
+
+    /* Data associated with x265_picture */
+    PicYuv*                m_fencPic;
+    int                    m_poc;
+    int64_t                m_pts;                // user provided presentation time stamp
+    int64_t                m_reorderedPts;
+    int64_t                m_dts;
+    int32_t                m_forceqp;            // Force to use the qp specified in qp file
+    void*                  m_userData;           // user provided pointer passed in with this picture
+
+    Lowres                 m_lowres;
+    bool                   m_lowresInit;         // lowres init complete (pre-analysis)
+    bool                   m_bChromaExtended;    // orig chroma planes motion extended for weight analysis
+
+    float*                 m_quantOffsets;       // points to quantOffsets in x265_picture
+
+    /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
+    ThreadSafeInteger      m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
+    volatile uint32_t      m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
+
+    Frame*                 m_next;               // PicList doubly linked list pointers
+    Frame*                 m_prev;
+    x265_param*            m_param;              // Points to the latest param set for the frame.
+    x265_analysis_data     m_analysisData;
+    Frame();
+
+    bool create(x265_param *param, float* quantOffsets);
+    bool allocEncodeData(x265_param *param, const SPS& sps);
+    void reinit(const SPS& sps);
+    void destroy();
+};
+}
+
+#endif // ifndef X265_FRAME_H
--- a/x265/source/common/framedata.cpp
+++ b/x265/source/common/framedata.cpp
@ -0,0 +1,69 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#include "framedata.h"
+#include "picyuv.h"
+
+using namespace X265_NS;
+
+FrameData::FrameData()
+{
+    memset(this, 0, sizeof(*this));
+}
+
+bool FrameData::create(const x265_param& param, const SPS& sps)
+{
+    m_param = &param;
+    m_slice  = new Slice;
+    m_picCTU = new CUData[sps.numCUsInFrame];
+
+    m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
+    for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
+        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp, ctuAddr);
+
+    CHECKED_MALLOC(m_cuStat, RCStatCU, sps.numCUsInFrame);
+    CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
+    reinit(sps);
+    return true;
+
+fail:
+    return false;
+}
+
+void FrameData::reinit(const SPS& sps)
+{
+    memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));
+    memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));
+}
+
+void FrameData::destroy()
+{
+    delete [] m_picCTU;
+    delete m_slice;
+    delete m_saoParam;
+
+    m_cuMemPool.destroy();
+
+    X265_FREE(m_cuStat);
+    X265_FREE(m_rowStat);
+}
--- a/x265/source/common/framedata.h
+++ b/x265/source/common/framedata.h
@ -0,0 +1,152 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Author: Steve Borho <steve@borho.org>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#ifndef X265_FRAMEDATA_H
+#define X265_FRAMEDATA_H
+
+#include "common.h"
+#include "slice.h"
+#include "cudata.h"
+
+namespace X265_NS {
+// private namespace
+
+class PicYuv;
+class JobProvider;
+
+#define INTER_MODES 4 // 2Nx2N, 2NxN, Nx2N, AMP modes
+#define INTRA_MODES 3 // DC, Planar, Angular modes
+
+/* Current frame stats for 2 pass */
+struct FrameStats
+{
+    int         mvBits;    /* MV bits (MV+Ref+Block Type) */
+    int         coeffBits; /* Texture bits (DCT coefs) */
+    int         miscBits;
+
+    int         intra8x8Cnt;
+    int         inter8x8Cnt;
+    int         skip8x8Cnt;
+
+    /* CU type counts stored as percentage */
+    double      percent8x8Intra;
+    double      percent8x8Inter;
+    double      percent8x8Skip;
+    double      avgLumaDistortion;
+    double      avgChromaDistortion;
+    double      avgPsyEnergy;
+    double      avgResEnergy;
+    double      percentIntraNxN;
+    double      percentSkipCu[NUM_CU_DEPTH];
+    double      percentMergeCu[NUM_CU_DEPTH];
+    double      percentIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+    double      percentInterDistribution[NUM_CU_DEPTH][3];           // 2Nx2N, RECT, AMP modes percentage
+
+    uint64_t    cntIntraNxN;
+    uint64_t    totalCu;
+    uint64_t    totalCtu;
+    uint64_t    lumaDistortion;
+    uint64_t    chromaDistortion;
+    uint64_t    psyEnergy;
+    uint64_t    resEnergy;
+    uint64_t    cntSkipCu[NUM_CU_DEPTH];
+    uint64_t    cntMergeCu[NUM_CU_DEPTH];
+    uint64_t    cntInter[NUM_CU_DEPTH];
+    uint64_t    cntIntra[NUM_CU_DEPTH];
+    uint64_t    cuInterDistribution[NUM_CU_DEPTH][INTER_MODES];
+    uint64_t    cuIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+
+    FrameStats()
+    {
+        memset(this, 0, sizeof(FrameStats));
+    }
+};
+
+/* Per-frame data that is used during encodes and referenced while the picture
+ * is available for reference. A FrameData instance is attached to a Frame as it
+ * comes out of the lookahead. Frames which are not being encoded do not have a
+ * FrameData instance. These instances are re-used once the encoded frame has
+ * no active references. They hold the Slice instance and the 'official' CTU
+ * data structures. They are maintained in a free-list pool along together with
+ * a reconstructed image PicYuv in order to conserve memory. */
+class FrameData
+{
+public:
+
+    Slice*         m_slice;
+    SAOParam*      m_saoParam;
+    const x265_param* m_param;
+
+    FrameData*     m_freeListNext;
+    PicYuv*        m_reconPic;
+    bool           m_bHasReferences;   /* used during DPB/RPS updates */
+    int            m_frameEncoderID;   /* the ID of the FrameEncoder encoding this frame */
+    JobProvider*   m_jobProvider;
+
+    CUDataMemPool  m_cuMemPool;
+    CUData*        m_picCTU;
+
+    /* Rate control data used during encode and by references */
+    struct RCStatCU
+    {
+        uint32_t totalBits;     /* total bits to encode this CTU */
+        uint32_t vbvCost;       /* sum of lowres costs for 16x16 sub-blocks */
+        uint32_t intraVbvCost;  /* sum of lowres intra costs for 16x16 sub-blocks */
+        uint64_t avgCost[4];    /* stores the avg cost of CU's in frame for each depth */
+        uint32_t count[4];      /* count and avgCost only used by Analysis at RD0..4 */
+        double   baseQp;        /* Qp of Cu set from RateControl/Vbv (only used by frame encoder) */
+    };
+
+    struct RCStatRow
+    {
+        uint32_t numEncodedCUs; /* ctuAddr of last encoded CTU in row */
+        uint32_t encodedBits;   /* sum of 'totalBits' of encoded CTUs */
+        uint32_t satdForVbv;    /* sum of lowres (estimated) costs for entire row */
+        uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra costs for entire row */
+        uint32_t diagSatd;
+        uint32_t diagIntraSatd;
+        double   diagQp;
+        double   diagQpScale;
+        double   sumQpRc;
+        double   sumQpAq;
+    };
+
+    RCStatCU*      m_cuStat;
+    RCStatRow*     m_rowStat;
+    FrameStats     m_frameStats; // stats of current frame for multi-pass encodes
+
+    double         m_avgQpRc;    /* avg QP as decided by rate-control */
+    double         m_avgQpAq;    /* avg QP as decided by AQ in addition to rate-control */
+    double         m_rateFactor; /* calculated based on the Frame QP */
+
+    FrameData();
+
+    bool create(const x265_param& param, const SPS& sps);
+    void reinit(const SPS& sps);
+    void destroy();
+
+    inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
+};
+}
+
+#endif // ifndef X265_FRAMEDATA_H
--- a/x265/source/common/intrapred.cpp
+++ b/x265/source/common/intrapred.cpp
@ -0,0 +1,270 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Min Chen <chenm003@163.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+
+using namespace X265_NS;
+
+namespace {
+
+template<int tuSize>
+void intraFilter(const pixel* samples, pixel* filtered) /* 1:2:1 filtering of left and top reference samples */
+{
+    const int tuSize2 = tuSize << 1;
+
+    pixel topLeft = samples[0], topLast = samples[tuSize2], leftLast = samples[tuSize2 + tuSize2];
+
+    // filtering top
+    for (int i = 1; i < tuSize2; i++)
+        filtered[i] = ((samples[i] << 1) + samples[i - 1] + samples[i + 1] + 2) >> 2;
+    filtered[tuSize2] = topLast;
+    
+    // filtering top-left
+    filtered[0] = ((topLeft << 1) + samples[1] + samples[tuSize2 + 1] + 2) >> 2;
+
+    // filtering left
+    filtered[tuSize2 + 1] = ((samples[tuSize2 + 1] << 1) + topLeft + samples[tuSize2 + 2] + 2) >> 2;
+    for (int i = tuSize2 + 2; i < tuSize2 + tuSize2; i++)
+        filtered[i] = ((samples[i] << 1) + samples[i - 1] + samples[i + 1] + 2) >> 2;
+    filtered[tuSize2 + tuSize2] = leftLast;
+}
+
+static void dcPredFilter(const pixel* above, const pixel* left, pixel* dst, intptr_t dststride, int size)
+{
+    // boundary pixels processing
+    dst[0] = (pixel)((above[0] + left[0] + 2 * dst[0] + 2) >> 2);
+
+    for (int x = 1; x < size; x++)
+        dst[x] = (pixel)((above[x] +  3 * dst[x] + 2) >> 2);
+
+    dst += dststride;
+    for (int y = 1; y < size; y++)
+    {
+        *dst = (pixel)((left[y] + 3 * *dst + 2) >> 2);
+        dst += dststride;
+    }
+}
+
+template<int width>
+void intra_pred_dc_c(pixel* dst, intptr_t dstStride, const pixel* srcPix, int /*dirMode*/, int bFilter)
+{
+    int k, l;
+
+    int dcVal = width;
+    for (int i = 0; i < width; i++)
+        dcVal += srcPix[1 + i] + srcPix[2 * width + 1 + i];
+
+    dcVal = dcVal / (width + width);
+    for (k = 0; k < width; k++)
+        for (l = 0; l < width; l++)
+            dst[k * dstStride + l] = (pixel)dcVal;
+
+    if (bFilter)
+        dcPredFilter(srcPix + 1, srcPix + (2 * width + 1), dst, dstStride, width);
+}
+
+template<int log2Size>
+void planar_pred_c(pixel* dst, intptr_t dstStride, const pixel* srcPix, int /*dirMode*/, int /*bFilter*/)
+{
+    const int blkSize = 1 << log2Size;
+
+    const pixel* above = srcPix + 1;
+    const pixel* left  = srcPix + (2 * blkSize + 1);
+
+    pixel topRight = above[blkSize];
+    pixel bottomLeft = left[blkSize];
+    for (int y = 0; y < blkSize; y++)
+        for (int x = 0; x < blkSize; x++)
+            dst[y * dstStride + x] = (pixel) (((blkSize - 1 - x) * left[y] + (blkSize - 1 -y) * above[x] + (x + 1) * topRight + (y + 1) * bottomLeft + blkSize) >> (log2Size + 1));
+}
+
+template<int width>
+void intra_pred_ang_c(pixel* dst, intptr_t dstStride, const pixel *srcPix0, int dirMode, int bFilter)
+{
+    int width2 = width << 1;
+    // Flip the neighbours in the horizontal case.
+    int horMode = dirMode < 18;
+    pixel neighbourBuf[129];
+    const pixel *srcPix = srcPix0;
+
+    if (horMode)
+    {
+        neighbourBuf[0] = srcPix[0];
+        for (int i = 0; i < width << 1; i++)
+        {
+            neighbourBuf[1 + i] = srcPix[width2 + 1 + i];
+            neighbourBuf[width2 + 1 + i] = srcPix[1 + i];
+        }
+        srcPix = neighbourBuf;
+    }
+
+    // Intra prediction angle and inverse angle tables.
+    const int8_t angleTable[17] = { -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+    const int16_t invAngleTable[8] = { 4096, 1638, 910, 630, 482, 390, 315, 256 };
+
+    // Get the prediction angle.
+    int angleOffset = horMode ? 10 - dirMode : dirMode - 26;
+    int angle = angleTable[8 + angleOffset];
+
+    // Vertical Prediction.
+    if (!angle)
+    {
+        for (int y = 0; y < width; y++)
+            for (int x = 0; x < width; x++)
+                dst[y * dstStride + x] = srcPix[1 + x];
+
+        if (bFilter)
+        {
+            int topLeft = srcPix[0], top = srcPix[1];
+            for (int y = 0; y < width; y++)
+                dst[y * dstStride] = x265_clip((int16_t)(top + ((srcPix[width2 + 1 + y] - topLeft) >> 1)));
+        }
+    }
+    else // Angular prediction.
+    {
+        // Get the reference pixels. The reference base is the first pixel to the top (neighbourBuf[1]).
+        pixel refBuf[64];
+        const pixel *ref;
+
+        // Use the projected left neighbours and the top neighbours.
+        if (angle < 0)
+        {
+            // Number of neighbours projected. 
+            int nbProjected = -((width * angle) >> 5) - 1;
+            pixel *ref_pix = refBuf + nbProjected + 1;
+
+            // Project the neighbours.
+            int invAngle = invAngleTable[- angleOffset - 1];
+            int invAngleSum = 128;
+            for (int i = 0; i < nbProjected; i++)
+            {
+                invAngleSum += invAngle;
+                ref_pix[- 2 - i] = srcPix[width2 + (invAngleSum >> 8)];
+            }
+
+            // Copy the top-left and top pixels.
+            for (int i = 0; i < width + 1; i++)
+                ref_pix[-1 + i] = srcPix[i];
+            ref = ref_pix;
+        }
+        else // Use the top and top-right neighbours.
+            ref = srcPix + 1;
+
+        // Pass every row.
+        int angleSum = 0;
+        for (int y = 0; y < width; y++)
+        {
+            angleSum += angle;
+            int offset = angleSum >> 5;
+            int fraction = angleSum & 31;
+
+            if (fraction) // Interpolate
+                for (int x = 0; x < width; x++)
+                    dst[y * dstStride + x] = (pixel)(((32 - fraction) * ref[offset + x] + fraction * ref[offset + x + 1] + 16) >> 5);
+            else // Copy.
+                for (int x = 0; x < width; x++)
+                    dst[y * dstStride + x] = ref[offset + x];
+        }
+    }
+
+    // Flip for horizontal.
+    if (horMode)
+    {
+        for (int y = 0; y < width - 1; y++)
+        {
+            for (int x = y + 1; x < width; x++)
+            {
+                pixel tmp              = dst[y * dstStride + x];
+                dst[y * dstStride + x] = dst[x * dstStride + y];
+                dst[x * dstStride + y] = tmp;
+            }
+        }
+    }
+}
+
+template<int log2Size>
+void all_angs_pred_c(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
+{
+    const int size = 1 << log2Size;
+    for (int mode = 2; mode <= 34; mode++)
+    {
+        pixel *srcPix  = (g_intraFilterFlags[mode] & size ? filtPix  : refPix);
+        pixel *out = dest + ((mode - 2) << (log2Size * 2));
+
+        intra_pred_ang_c<size>(out, size, srcPix, mode, bLuma);
+
+        // Optimize code don't flip buffer
+        bool modeHor = (mode < 18);
+
+        // transpose the block if this is a horizontal mode
+        if (modeHor)
+        {
+            for (int k = 0; k < size - 1; k++)
+            {
+                for (int l = k + 1; l < size; l++)
+                {
+                    pixel tmp         = out[k * size + l];
+                    out[k * size + l] = out[l * size + k];
+                    out[l * size + k] = tmp;
+                }
+            }
+        }
+    }
+}
+}
+
+namespace X265_NS {
+// x265 private namespace
+
+void setupIntraPrimitives_c(EncoderPrimitives& p)
+{
+    p.cu[BLOCK_4x4].intra_filter = intraFilter<4>;
+    p.cu[BLOCK_8x8].intra_filter = intraFilter<8>;
+    p.cu[BLOCK_16x16].intra_filter = intraFilter<16>;
+    p.cu[BLOCK_32x32].intra_filter = intraFilter<32>;
+
+    p.cu[BLOCK_4x4].intra_pred[PLANAR_IDX] = planar_pred_c<2>;
+    p.cu[BLOCK_8x8].intra_pred[PLANAR_IDX] = planar_pred_c<3>;
+    p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = planar_pred_c<4>;
+    p.cu[BLOCK_32x32].intra_pred[PLANAR_IDX] = planar_pred_c<5>;
+
+    p.cu[BLOCK_4x4].intra_pred[DC_IDX] = intra_pred_dc_c<4>;
+    p.cu[BLOCK_8x8].intra_pred[DC_IDX] = intra_pred_dc_c<8>;
+    p.cu[BLOCK_16x16].intra_pred[DC_IDX] = intra_pred_dc_c<16>;
+    p.cu[BLOCK_32x32].intra_pred[DC_IDX] = intra_pred_dc_c<32>;
+
+    for (int i = 2; i < NUM_INTRA_MODE; i++)
+    {
+        p.cu[BLOCK_4x4].intra_pred[i] = intra_pred_ang_c<4>;
+        p.cu[BLOCK_8x8].intra_pred[i] = intra_pred_ang_c<8>;
+        p.cu[BLOCK_16x16].intra_pred[i] = intra_pred_ang_c<16>;
+        p.cu[BLOCK_32x32].intra_pred[i] = intra_pred_ang_c<32>;
+    }
+
+    p.cu[BLOCK_4x4].intra_pred_allangs = all_angs_pred_c<2>;
+    p.cu[BLOCK_8x8].intra_pred_allangs = all_angs_pred_c<3>;
+    p.cu[BLOCK_16x16].intra_pred_allangs = all_angs_pred_c<4>;
+    p.cu[BLOCK_32x32].intra_pred_allangs = all_angs_pred_c<5>;
+}
+}
--- a/x265/source/common/ipfilter.cpp
+++ b/x265/source/common/ipfilter.cpp
@ -0,0 +1,520 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Deepthi Devaki <deepthidevaki@multicorewareinc.com>,
+ *          Rajesh Paulraj <rajesh@multicorewareinc.com>
+ *          Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+#include "x265.h"
+
+using namespace X265_NS;
+
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions
+#endif
+
+namespace {
+// file local namespace
+
+template<int width, int height>
+void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
+{
+    int shift = IF_INTERNAL_PREC - X265_DEPTH;
+    int row, col;
+
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int16_t val = src[col] << shift;
+            dst[col] = val - (int16_t)IF_INTERNAL_OFFS;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+static void extendCURowColBorder(pixel* txt, intptr_t stride, int width, int height, int marginX)
+{
+    for (int y = 0; y < height; y++)
+    {
+#if HIGH_BIT_DEPTH
+        for (int x = 0; x < marginX; x++)
+        {
+            txt[-marginX + x] = txt[0];
+            txt[width + x] = txt[width - 1];
+        }
+
+#else
+        memset(txt - marginX, txt[0], marginX);
+        memset(txt + width, txt[width - 1], marginX);
+#endif
+
+        txt += stride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_horiz_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_FILTER_PREC;
+    int offset =  (1 << (headRoom - 1));
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+    int cStride = 1;
+
+    src -= (N / 2 - 1) * cStride;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * cStride] * coeff[0];
+            sum += src[col + 1 * cStride] * coeff[1];
+            sum += src[col + 2 * cStride] * coeff[2];
+            sum += src[col + 3 * cStride] * coeff[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * cStride] * coeff[4];
+                sum += src[col + 5 * cStride] * coeff[5];
+                sum += src[col + 6 * cStride] * coeff[6];
+                sum += src[col + 7 * cStride] * coeff[7];
+            }
+            int16_t val = (int16_t)((sum + offset) >> headRoom);
+
+            if (val < 0) val = 0;
+            if (val > maxVal) val = maxVal;
+            dst[col] = (pixel)val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+{
+    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC - headRoom;
+    int offset = -IF_INTERNAL_OFFS << shift;
+    int blkheight = height;
+
+    src -= N / 2 - 1;
+
+    if (isRowExt)
+    {
+        src -= (N / 2 - 1) * srcStride;
+        blkheight += N - 1;
+    }
+
+    int row, col;
+    for (row = 0; row < blkheight; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0] * coeff[0];
+            sum += src[col + 1] * coeff[1];
+            sum += src[col + 2] * coeff[2];
+            sum += src[col + 3] * coeff[3];
+            if (N == 8)
+            {
+                sum += src[col + 4] * coeff[4];
+                sum += src[col + 5] * coeff[5];
+                sum += src[col + 6] * coeff[6];
+                sum += src[col + 7] * coeff[7];
+            }
+
+            int16_t val = (int16_t)((sum + offset) >> shift);
+            dst[col] = val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int shift = IF_FILTER_PREC;
+    int offset = 1 << (shift - 1);
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+
+    src -= (N / 2 - 1) * srcStride;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * srcStride] * c[0];
+            sum += src[col + 1 * srcStride] * c[1];
+            sum += src[col + 2 * srcStride] * c[2];
+            sum += src[col + 3 * srcStride] * c[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * srcStride] * c[4];
+                sum += src[col + 5 * srcStride] * c[5];
+                sum += src[col + 6 * srcStride] * c[6];
+                sum += src[col + 7 * srcStride] * c[7];
+            }
+
+            int16_t val = (int16_t)((sum + offset) >> shift);
+            val = (val < 0) ? 0 : val;
+            val = (val > maxVal) ? maxVal : val;
+
+            dst[col] = (pixel)val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC - headRoom;
+    int offset = -IF_INTERNAL_OFFS << shift;
+
+    src -= (N / 2 - 1) * srcStride;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * srcStride] * c[0];
+            sum += src[col + 1 * srcStride] * c[1];
+            sum += src[col + 2 * srcStride] * c[2];
+            sum += src[col + 3 * srcStride] * c[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * srcStride] * c[4];
+                sum += src[col + 5 * srcStride] * c[5];
+                sum += src[col + 6 * srcStride] * c[6];
+                sum += src[col + 7 * srcStride] * c[7];
+            }
+
+            int16_t val = (int16_t)((sum + offset) >> shift);
+            dst[col] = val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
+{
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC + headRoom;
+    int offset = (1 << (shift - 1)) + (IF_INTERNAL_OFFS << IF_FILTER_PREC);
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+    const int16_t* coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
+
+    src -= (N / 2 - 1) * srcStride;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * srcStride] * coeff[0];
+            sum += src[col + 1 * srcStride] * coeff[1];
+            sum += src[col + 2 * srcStride] * coeff[2];
+            sum += src[col + 3 * srcStride] * coeff[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * srcStride] * coeff[4];
+                sum += src[col + 5 * srcStride] * coeff[5];
+                sum += src[col + 6 * srcStride] * coeff[6];
+                sum += src[col + 7 * srcStride] * coeff[7];
+            }
+
+            int16_t val = (int16_t)((sum + offset) >> shift);
+
+            val = (val < 0) ? 0 : val;
+            val = (val > maxVal) ? maxVal : val;
+
+            dst[col] = (pixel)val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
+    int shift = IF_FILTER_PREC;
+    int row, col;
+
+    src -= (N / 2 - 1) * srcStride;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * srcStride] * c[0];
+            sum += src[col + 1 * srcStride] * c[1];
+            sum += src[col + 2 * srcStride] * c[2];
+            sum += src[col + 3 * srcStride] * c[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * srcStride] * c[4];
+                sum += src[col + 5 * srcStride] * c[5];
+                sum += src[col + 6 * srcStride] * c[6];
+                sum += src[col + 7 * srcStride] * c[7];
+            }
+
+            int16_t val = (int16_t)((sum) >> shift);
+            dst[col] = val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N>
+void filterVertical_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int coeffIdx)
+{
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC + headRoom;
+    int offset = (1 << (shift - 1)) + (IF_INTERNAL_OFFS << IF_FILTER_PREC);
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+    const int16_t* coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
+
+    src -= (N / 2 - 1) * srcStride;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0 * srcStride] * coeff[0];
+            sum += src[col + 1 * srcStride] * coeff[1];
+            sum += src[col + 2 * srcStride] * coeff[2];
+            sum += src[col + 3 * srcStride] * coeff[3];
+            if (N == 8)
+            {
+                sum += src[col + 4 * srcStride] * coeff[4];
+                sum += src[col + 5 * srcStride] * coeff[5];
+                sum += src[col + 6 * srcStride] * coeff[6];
+                sum += src[col + 7 * srcStride] * coeff[7];
+            }
+
+            int16_t val = (int16_t)((sum + offset) >> shift);
+
+            val = (val < 0) ? 0 : val;
+            val = (val > maxVal) ? maxVal : val;
+
+            dst[col] = (pixel)val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
+void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
+{
+    short immedVals[(64 + 8) * (64 + 8)];
+
+    interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width, idxX, 1);
+    filterVertical_sp_c<N>(immedVals + 3 * width, width, dst, dstStride, width, height, idxY);
+}
+}
+
+namespace X265_NS {
+// x265 private namespace
+
+#define CHROMA_420(W, H) \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s = filterPixelToShort_c<W, H>;
+
+#define CHROMA_422(W, H) \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s = filterPixelToShort_c<W, H>;
+
+#define CHROMA_444(W, H) \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s = filterPixelToShort_c<W, H>;
+
+#define LUMA(W, H) \
+    p.pu[LUMA_ ## W ## x ## H].luma_hpp     = interp_horiz_pp_c<8, W, H>; \
+    p.pu[LUMA_ ## W ## x ## H].luma_hps     = interp_horiz_ps_c<8, W, H>; \
+    p.pu[LUMA_ ## W ## x ## H].luma_vpp     = interp_vert_pp_c<8, W, H>;  \
+    p.pu[LUMA_ ## W ## x ## H].luma_vps     = interp_vert_ps_c<8, W, H>;  \
+    p.pu[LUMA_ ## W ## x ## H].luma_vsp     = interp_vert_sp_c<8, W, H>;  \
+    p.pu[LUMA_ ## W ## x ## H].luma_vss     = interp_vert_ss_c<8, W, H>;  \
+    p.pu[LUMA_ ## W ## x ## H].luma_hvpp    = interp_hv_pp_c<8, W, H>; \
+    p.pu[LUMA_ ## W ## x ## H].convert_p2s = filterPixelToShort_c<W, H>;
+
+void setupFilterPrimitives_c(EncoderPrimitives& p)
+{
+    LUMA(4, 4);
+    LUMA(8, 8);
+    CHROMA_420(4,  4);
+    LUMA(4, 8);
+    CHROMA_420(2,  4);
+    LUMA(8, 4);
+    CHROMA_420(4,  2);
+    LUMA(16, 16);
+    CHROMA_420(8,  8);
+    LUMA(16,  8);
+    CHROMA_420(8,  4);
+    LUMA(8, 16);
+    CHROMA_420(4,  8);
+    LUMA(16, 12);
+    CHROMA_420(8,  6);
+    LUMA(12, 16);
+    CHROMA_420(6,  8);
+    LUMA(16,  4);
+    CHROMA_420(8,  2);
+    LUMA(4, 16);
+    CHROMA_420(2,  8);
+    LUMA(32, 32);
+    CHROMA_420(16, 16);
+    LUMA(32, 16);
+    CHROMA_420(16, 8);
+    LUMA(16, 32);
+    CHROMA_420(8,  16);
+    LUMA(32, 24);
+    CHROMA_420(16, 12);
+    LUMA(24, 32);
+    CHROMA_420(12, 16);
+    LUMA(32,  8);
+    CHROMA_420(16, 4);
+    LUMA(8, 32);
+    CHROMA_420(4,  16);
+    LUMA(64, 64);
+    CHROMA_420(32, 32);
+    LUMA(64, 32);
+    CHROMA_420(32, 16);
+    LUMA(32, 64);
+    CHROMA_420(16, 32);
+    LUMA(64, 48);
+    CHROMA_420(32, 24);
+    LUMA(48, 64);
+    CHROMA_420(24, 32);
+    LUMA(64, 16);
+    CHROMA_420(32, 8);
+    LUMA(16, 64);
+    CHROMA_420(8,  32);
+
+    CHROMA_422(4, 8);
+    CHROMA_422(4, 4);
+    CHROMA_422(2, 4);
+    CHROMA_422(2, 8);
+    CHROMA_422(8,  16);
+    CHROMA_422(8,  8);
+    CHROMA_422(4,  16);
+    CHROMA_422(8,  12);
+    CHROMA_422(6,  16);
+    CHROMA_422(8,  4);
+    CHROMA_422(2,  16);
+    CHROMA_422(16, 32);
+    CHROMA_422(16, 16);
+    CHROMA_422(8,  32);
+    CHROMA_422(16, 24);
+    CHROMA_422(12, 32);
+    CHROMA_422(16, 8);
+    CHROMA_422(4,  32);
+    CHROMA_422(32, 64);
+    CHROMA_422(32, 32);
+    CHROMA_422(16, 64);
+    CHROMA_422(32, 48);
+    CHROMA_422(24, 64);
+    CHROMA_422(32, 16);
+    CHROMA_422(8,  64);
+
+    CHROMA_444(4,  4);
+    CHROMA_444(8,  8);
+    CHROMA_444(4,  8);
+    CHROMA_444(8,  4);
+    CHROMA_444(16, 16);
+    CHROMA_444(16, 8);
+    CHROMA_444(8,  16);
+    CHROMA_444(16, 12);
+    CHROMA_444(12, 16);
+    CHROMA_444(16, 4);
+    CHROMA_444(4,  16);
+    CHROMA_444(32, 32);
+    CHROMA_444(32, 16);
+    CHROMA_444(16, 32);
+    CHROMA_444(32, 24);
+    CHROMA_444(24, 32);
+    CHROMA_444(32, 8);
+    CHROMA_444(8,  32);
+    CHROMA_444(64, 64);
+    CHROMA_444(64, 32);
+    CHROMA_444(32, 64);
+    CHROMA_444(64, 48);
+    CHROMA_444(48, 64);
+    CHROMA_444(64, 16);
+    CHROMA_444(16, 64);
+
+    p.extendRowBorder = extendCURowColBorder;
+}
+}
--- a/x265/source/common/loopfilter.cpp
+++ b/x265/source/common/loopfilter.cpp
@ -0,0 +1,154 @@
+/*****************************************************************************
+* Copyright (C) 2013 x265 project
+*
+* Authors: Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+*          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+
+#define PIXEL_MIN 0
+#define PIXEL_MAX ((1 << X265_DEPTH) - 1)
+
+namespace {
+
+/* get the sign of input variable (TODO: this is a dup, make common) */
+inline int8_t signOf(int x)
+{
+    return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
+}
+
+static void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
+{
+    for (int x = 0; x < endX; x++)
+        dst[x] = signOf(src1[x] - src2[x]);
+}
+
+static void processSaoCUE0(pixel * rec, int8_t * offsetEo, int width, int8_t* signLeft, intptr_t stride)
+{
+    int x, y;
+    int8_t signRight, signLeft0;
+    int8_t edgeType;
+
+    for (y = 0; y < 2; y++)
+    {
+        signLeft0 = signLeft[y];
+        for (x = 0; x < width; x++)
+        {
+            signRight = ((rec[x] - rec[x + 1]) < 0) ? -1 : ((rec[x] - rec[x + 1]) > 0) ? 1 : 0;
+            edgeType = signRight + signLeft0 + 2;
+            signLeft0 = -signRight;
+            rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+        }
+        rec += stride;
+    }
+}
+
+static void processSaoCUE1(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
+{
+    int x;
+    int8_t signDown;
+    int edgeType;
+
+    for (x = 0; x < width; x++)
+    {
+        signDown = signOf(rec[x] - rec[x + stride]);
+        edgeType = signDown + upBuff1[x] + 2;
+        upBuff1[x] = -signDown;
+        rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+    }
+}
+
+static void processSaoCUE1_2Rows(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
+{
+    int x, y;
+    int8_t signDown;
+    int edgeType;
+
+    for (y = 0; y < 2; y++)
+    {
+        for (x = 0; x < width; x++)
+        {
+            signDown = signOf(rec[x] - rec[x + stride]);
+            edgeType = signDown + upBuff1[x] + 2;
+            upBuff1[x] = -signDown;
+            rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+        }
+        rec += stride;
+    }
+}
+
+static void processSaoCUE2(pixel * rec, int8_t * bufft, int8_t * buff1, int8_t * offsetEo, int width, intptr_t stride)
+{
+    int x;
+    for (x = 0; x < width; x++)
+    {
+        int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
+        int edgeType = signDown + buff1[x] + 2;
+        bufft[x + 1] = -signDown;
+        rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);;
+    }
+}
+
+static void processSaoCUE3(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX)
+{
+    int8_t signDown;
+    int8_t edgeType;
+
+    for (int x = startX + 1; x < endX; x++)
+    {
+        signDown = signOf(rec[x] - rec[x + stride]);
+        edgeType = signDown + upBuff1[x] + 2;
+        upBuff1[x - 1] = -signDown;
+        rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+    }
+}
+
+static void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
+{
+    #define SAO_BO_BITS 5
+    const int boShift = X265_DEPTH - SAO_BO_BITS;
+    int x, y;
+    for (y = 0; y < ctuHeight; y++)
+    {
+        for (x = 0; x < ctuWidth; x++)
+        {
+            rec[x] = x265_clip(rec[x] + offset[rec[x] >> boShift]);
+        }
+        rec += stride;
+    }
+}
+}
+
+namespace X265_NS {
+void setupLoopFilterPrimitives_c(EncoderPrimitives &p)
+{
+    p.saoCuOrgE0 = processSaoCUE0;
+    p.saoCuOrgE1 = processSaoCUE1;
+    p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
+    p.saoCuOrgE2[0] = processSaoCUE2;
+    p.saoCuOrgE2[1] = processSaoCUE2;
+    p.saoCuOrgE3[0] = processSaoCUE3;
+    p.saoCuOrgE3[1] = processSaoCUE3;
+    p.saoCuOrgB0 = processSaoCUB0;
+    p.sign = calSign;
+}
+}
--- a/x265/source/common/lowres.cpp
+++ b/x265/source/common/lowres.cpp
@ -0,0 +1,165 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "picyuv.h"
+#include "lowres.h"
+#include "mv.h"
+
+using namespace X265_NS;
+
+bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled)
+{
+    isLowres = true;
+    bframes = _bframes;
+    width = origPic->m_picWidth / 2;
+    lines = origPic->m_picHeight / 2;
+    lumaStride = width + 2 * origPic->m_lumaMarginX;
+    if (lumaStride & 31)
+        lumaStride += 32 - (lumaStride & 31);
+    maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+    maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+    int cuCount = maxBlocksInRow * maxBlocksInCol;
+
+    /* rounding the width to multiple of lowres CU size */
+    width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
+    lines = maxBlocksInCol * X265_LOWRES_CU_SIZE;
+
+    size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
+    size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX;
+
+    if (bAQEnabled)
+    {
+        CHECKED_MALLOC(qpAqOffset, double, cuCount);
+        CHECKED_MALLOC(invQscaleFactor, int, cuCount);
+        CHECKED_MALLOC(qpCuTreeOffset, double, cuCount);
+    }
+    CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
+
+    /* allocate lowres buffers */
+    CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize);
+
+    buffer[1] = buffer[0] + planesize;
+    buffer[2] = buffer[1] + planesize;
+    buffer[3] = buffer[2] + planesize;
+
+    lowresPlane[0] = buffer[0] + padoffset;
+    lowresPlane[1] = buffer[1] + padoffset;
+    lowresPlane[2] = buffer[2] + padoffset;
+    lowresPlane[3] = buffer[3] + padoffset;
+
+    CHECKED_MALLOC(intraCost, int32_t, cuCount);
+    CHECKED_MALLOC(intraMode, uint8_t, cuCount);
+
+    for (int i = 0; i < bframes + 2; i++)
+    {
+        for (int j = 0; j < bframes + 2; j++)
+        {
+            CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol);
+            CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount);
+        }
+    }
+
+    for (int i = 0; i < bframes + 1; i++)
+    {
+        CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
+        CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount);
+        CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount);
+        CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount);
+    }
+
+    return true;
+
+fail:
+    return false;
+}
+
+void Lowres::destroy()
+{
+    X265_FREE(buffer[0]);
+    X265_FREE(intraCost);
+    X265_FREE(intraMode);
+
+    for (int i = 0; i < bframes + 2; i++)
+    {
+        for (int j = 0; j < bframes + 2; j++)
+        {
+            X265_FREE(rowSatds[i][j]);
+            X265_FREE(lowresCosts[i][j]);
+        }
+    }
+
+    for (int i = 0; i < bframes + 1; i++)
+    {
+        X265_FREE(lowresMvs[0][i]);
+        X265_FREE(lowresMvs[1][i]);
+        X265_FREE(lowresMvCosts[0][i]);
+        X265_FREE(lowresMvCosts[1][i]);
+    }
+
+    X265_FREE(qpAqOffset);
+    X265_FREE(invQscaleFactor);
+    X265_FREE(qpCuTreeOffset);
+    X265_FREE(propagateCost);
+}
+
+// (re) initialize lowres state
+void Lowres::init(PicYuv *origPic, int poc)
+{
+    bLastMiniGopBFrame = false;
+    bScenecut = false;  // could be a scene-cut, until ruled out by flash detection
+    bKeyframe = false; // Not a keyframe unless identified by lookahead
+    frameNum = poc;
+    leadingBframes = 0;
+    indB = 0;
+    satdCost = (int64_t)-1;
+    memset(costEst, -1, sizeof(costEst));
+    memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
+
+    if (qpAqOffset && invQscaleFactor)
+        memset(costEstAq, -1, sizeof(costEstAq));
+
+    for (int y = 0; y < bframes + 2; y++)
+        for (int x = 0; x < bframes + 2; x++)
+            rowSatds[y][x][0] = -1;
+
+    for (int i = 0; i < bframes + 1; i++)
+    {
+        lowresMvs[0][i][0].x = 0x7FFF;
+        lowresMvs[1][i][0].x = 0x7FFF;
+    }
+
+    for (int i = 0; i < bframes + 2; i++)
+        intraMbs[i] = 0;
+
+    /* downscale and generate 4 hpel planes for lookahead */
+    primitives.frameInitLowres(origPic->m_picOrg[0],
+                               lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3],
+                               origPic->m_stride, lumaStride, width, lines);
+
+    /* extend hpel planes for motion search */
+    extendPicBorder(lowresPlane[0], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+    extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+    extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+    extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+    fpelPlane[0] = lowresPlane[0];
+}
--- a/x265/source/common/lowres.h
+++ b/x265/source/common/lowres.h
@ -0,0 +1,159 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_LOWRES_H
+#define X265_LOWRES_H
+
+#include "primitives.h"
+#include "common.h"
+#include "picyuv.h"
+#include "mv.h"
+
+namespace X265_NS {
+// private namespace
+
+struct ReferencePlanes
+{
+    ReferencePlanes() { memset(this, 0, sizeof(ReferencePlanes)); }
+
+    pixel*   fpelPlane[3];
+    pixel*   lowresPlane[4];
+    PicYuv*  reconPic;
+
+    bool     isWeighted;
+    bool     isLowres;
+
+    intptr_t lumaStride;
+    intptr_t chromaStride;
+
+    struct {
+        int      weight;
+        int      offset;
+        int      shift;
+        int      round;
+    } w[3];
+
+    pixel* getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return fpelPlane[0] + reconPic->m_cuOffsetY[ctuAddr] + reconPic->m_buOffsetY[absPartIdx]; }
+    pixel* getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return fpelPlane[1] + reconPic->m_cuOffsetC[ctuAddr] + reconPic->m_buOffsetC[absPartIdx]; }
+    pixel* getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return fpelPlane[2] + reconPic->m_cuOffsetC[ctuAddr] + reconPic->m_buOffsetC[absPartIdx]; }
+
+    /* lowres motion compensation, you must provide a buffer and stride for QPEL averaged pixels
+     * in case QPEL is required.  Else it returns a pointer to the HPEL pixels */
+    inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride)
+    {
+        if ((qmv.x | qmv.y) & 1)
+        {
+            int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
+            pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
+            primitives.pu[LUMA_8x8].pixelavg_pp(buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
+            return buf;
+        }
+        else
+        {
+            outstride = lumaStride;
+            int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);
+            return lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
+        }
+    }
+
+    inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp)
+    {
+        if ((qmv.x | qmv.y) & 1)
+        {
+            ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
+            int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
+            pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
+            primitives.pu[LUMA_8x8].pixelavg_pp(subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
+            return comp(fenc, FENC_STRIDE, subpelbuf, 8);
+        }
+        else
+        {
+            int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);
+            pixel *fref = lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
+            return comp(fenc, FENC_STRIDE, fref, lumaStride);
+        }
+    }
+};
+
+/* lowres buffers, sizes and strides */
+struct Lowres : public ReferencePlanes
+{
+    pixel *buffer[4];
+
+    int    frameNum;         // Presentation frame number
+    int    sliceType;        // Slice type decided by lookahead
+    int    width;            // width of lowres frame in pixels
+    int    lines;            // height of lowres frame in pixel lines
+    int    leadingBframes;   // number of leading B frames for P or I
+
+    bool   bScenecut;        // Set to false if the frame cannot possibly be part of a real scenecut.
+    bool   bKeyframe;
+    bool   bLastMiniGopBFrame;
+
+    /* lookahead output data */
+    int64_t   costEst[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
+    int64_t   costEstAq[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
+    int32_t*  rowSatds[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
+    int       intraMbs[X265_BFRAME_MAX + 2];
+    int32_t*  intraCost;
+    uint8_t*  intraMode;
+    int64_t   satdCost;
+    uint16_t* lowresCostForRc;
+    uint16_t(*lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2]);
+    int32_t*  lowresMvCosts[2][X265_BFRAME_MAX + 1];
+    MV*       lowresMvs[2][X265_BFRAME_MAX + 1];
+    uint32_t  maxBlocksInRow;
+    uint32_t  maxBlocksInCol;
+
+    /* used for vbvLookahead */
+    int       plannedType[X265_LOOKAHEAD_MAX + 1];
+    int64_t   plannedSatd[X265_LOOKAHEAD_MAX + 1];
+    int       indB;
+    int       bframes;
+
+    /* rate control / adaptive quant data */
+    double*   qpAqOffset;      // AQ QP offset values for each 16x16 CU
+    double*   qpCuTreeOffset;  // cuTree QP offset values for each 16x16 CU
+    int*      invQscaleFactor; // qScale values for qp Aq Offsets
+    uint64_t  wp_ssd[3];       // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
+    uint64_t  wp_sum[3];
+
+    /* cutree intermediate data */
+    uint16_t* propagateCost;
+    double    weightedCostDelta[X265_BFRAME_MAX + 2];
+
+    bool create(PicYuv *origPic, int _bframes, bool bAqEnabled);
+    void destroy();
+    void init(PicYuv *origPic, int poc);
+};
+}
+
+#endif // ifndef X265_LOWRES_H
--- a/x265/source/common/md5.cpp
+++ b/x265/source/common/md5.cpp
@ -0,0 +1,268 @@
+/*****************************************************************************
+ * md5.cpp: Calculate MD5 for SEI
+ *****************************************************************************
+ * Copyright (C) 2011-2012 x265 project
+ *
+ * Authors: Min Chen <chenm003@163.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at chenm003@163.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "md5.h"
+
+namespace X265_NS {
+// private x265 namespace
+
+#ifndef ARCH_BIG_ENDIAN
+#define byteReverse(buf, len)   /* Nothing */
+#else
+static void byteReverse(uint8_t_t *buf, unsigned int nSize)
+{
+    int i;
+    uint32_t tmp;
+
+    for (i = 0; i < nSize; i++)
+    {
+        tmp = ((unsigned int)buf[3] << 8 | buf[2]) << 16 |
+            ((unsigned int)buf[1] << 8 | buf[0]);
+        *(uint32_t*)buf = tmp;
+        buf += 4;
+    }
+}
+
+#endif // ifndef ARCH_BIG_ENDIAN
+
+void MD5Transform(uint32_t *buf, uint32_t *in);
+
+/*
+ * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
+ * initialization constants.
+ */
+void MD5Init(MD5Context *ctx)
+{
+    ctx->buf[0] = 0x67452301;
+    ctx->buf[1] = 0xefcdab89;
+    ctx->buf[2] = 0x98badcfe;
+    ctx->buf[3] = 0x10325476;
+
+    ctx->bits[0] = 0;
+    ctx->bits[1] = 0;
+}
+
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
+ */
+void MD5Update(MD5Context *ctx, uint8_t *buf, uint32_t len)
+{
+    uint32_t t;
+
+    /* Update bitcount */
+
+    t = ctx->bits[0];
+    if ((ctx->bits[0] = t + ((uint32_t)len << 3)) < t)
+        ctx->bits[1]++; /* Carry from low to high */
+    ctx->bits[1] += len >> 29;
+
+    t = (t >> 3) & 0x3F;        /* Bytes already in shsInfo->data */
+
+    /* Handle any leading odd-sized chunks */
+
+    if (t)
+    {
+        uint8_t *p = (uint8_t*)ctx->in + t;
+
+        t = 64 - t;
+        if (len < t)
+        {
+            memcpy(p, buf, len);
+            return;
+        }
+        memcpy(p, buf, t);
+        byteReverse(ctx->in, 16);
+        MD5Transform(ctx->buf, (uint32_t*)ctx->in);
+        buf += t;
+        len -= t;
+    }
+    /* Process data in 64-byte chunks */
+
+    while (len >= 64)
+    {
+        memcpy(ctx->in, buf, 64);
+        byteReverse(ctx->in, 16);
+        MD5Transform(ctx->buf, (uint32_t*)ctx->in);
+        buf += 64;
+        len -= 64;
+    }
+
+    /* Handle any remaining bytes of data. */
+
+    memcpy(ctx->in, buf, len);
+}
+
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+void MD5Final(MD5Context *ctx, uint8_t *digest)
+{
+    uint32_t count;
+    uint8_t  *p;
+
+    /* Compute number of bytes mod 64 */
+    count = (ctx->bits[0] >> 3) & 0x3F;
+
+    /* Set the first char of padding to 0x80.  This is safe since there is
+       always at least one byte free */
+    p = ctx->in + count;
+    *p++ = 0x80;
+
+    /* Bytes of padding needed to make 64 bytes */
+    count = 64 - 1 - count;
+
+    /* Pad out to 56 mod 64 */
+    if (count < 8)
+    {
+        /* Two lots of padding:  Pad the first block to 64 bytes */
+        memset(p, 0, count);
+        byteReverse(ctx->in, 16);
+        MD5Transform(ctx->buf, (uint32_t*)ctx->in);
+
+        /* Now fill the next block with 56 bytes */
+        memset(ctx->in, 0, 56);
+    }
+    else
+    {
+        /* Pad block to 56 bytes */
+        memset(p, 0, count - 8);
+    }
+    byteReverse(ctx->in, 14);
+
+    /* Append length in bits and transform */
+    // CHECK_ME: Always use 32-bits operator
+    uint32_t *table = (uint32_t*)&ctx->in;
+    table[14] = ctx->bits[0];
+    table[15] = ctx->bits[1];
+
+    MD5Transform(ctx->buf, (uint32_t*)ctx->in);
+    byteReverse((uint8_t*)ctx->buf, 4);
+    memcpy(digest, ctx->buf, 16);
+
+    memset(ctx, 0, sizeof(*ctx));        /* In case it's sensitive */
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+    (w += f(x, y, z) + data,  w = w << s | w >> (32 - s),  w += x)
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data.  MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void MD5Transform(uint32_t *buf, uint32_t *in)
+{
+    register uint32_t a, b, c, d;
+
+    a = buf[0];
+    b = buf[1];
+    c = buf[2];
+    d = buf[3];
+
+    MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+    MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+    MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+    MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+    MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+    MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+    MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+    MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+    MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+    MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+    MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+    MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+    MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+    MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+    MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+    MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+    MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+    MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+    MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+    MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+    MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+    MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+    MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+    MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+    buf[0] += a;
+    buf[1] += b;
+    buf[2] += c;
+    buf[3] += d;
+}
+}
--- a/x265/source/common/md5.h
+++ b/x265/source/common/md5.h
@ -0,0 +1,79 @@
+/*****************************************************************************
+ * md5.h: Calculate MD5
+ *****************************************************************************
+ * Copyright (C) 2011-2012 x265 project
+ *
+ * Authors: Min Chen <chenm003@163.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at chenm003@163.com.
+ *****************************************************************************/
+
+#ifndef X265_MD5_H
+#define X265_MD5_H
+
+#include "common.h"
+
+namespace X265_NS {
+//private x265 namespace
+
+typedef struct MD5Context
+{
+    uint32_t buf[4];
+    uint32_t bits[2];
+    unsigned char in[64];
+} MD5Context;
+
+void MD5Init(MD5Context *context);
+void MD5Update(MD5Context *context, unsigned char *buf, uint32_t len);
+void MD5Final(MD5Context *ctx, uint8_t *digest);
+
+class MD5
+{
+public:
+
+    /**
+     * initialize digest state
+     */
+    MD5()
+    {
+        MD5Init(&m_state);
+    }
+
+    /**
+     * compute digest over buf of length len.
+     * multiple calls may extend the digest over more data.
+     */
+    void update(unsigned char *buf, unsigned len)
+    {
+        MD5Update(&m_state, buf, len);
+    }
+
+    /**
+     * flush any outstanding MD5 data, write the digest into digest.
+     */
+    void finalize(unsigned char digest[16])
+    {
+        MD5Final(&m_state, digest);
+    }
+
+private:
+
+    MD5Context m_state;
+};
+}
+
+#endif // ifndef X265_MD5_H
--- a/x265/source/common/mv.h
+++ b/x265/source/common/mv.h
@ -0,0 +1,111 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_MV_H
+#define X265_MV_H
+
+#include "common.h"
+#include "primitives.h"
+
+namespace X265_NS {
+// private x265 namespace
+
+#if _MSC_VER
+#pragma warning(disable: 4201) // non-standard extension used (nameless struct/union)
+#endif
+
+struct MV
+{
+public:
+
+    union {
+        struct { int16_t x, y; };
+
+        int32_t word;
+    };
+
+    MV()                                       {}
+    MV(int32_t w) : word(w)                    {}
+    MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
+
+    MV& operator =(uint32_t w)                 { word = w; return *this; }
+
+    MV& operator +=(const MV& other)           { x += other.x; y += other.y; return *this; }
+
+    MV& operator -=(const MV& other)           { x -= other.x; y -= other.y; return *this; }
+
+    MV& operator >>=(int i)                    { x >>= i; y >>= i; return *this; }
+
+#if USING_FTRAPV
+    /* avoid signed left-shifts when -ftrapv is enabled */
+    MV& operator <<=(int i)                    { x *= (1 << i); y *= (1 << i); return *this; }
+    MV operator <<(int i) const                { return MV(x * (1 << i), y * (1 << i)); }
+#else
+    MV& operator <<=(int i)                    { x <<= i; y <<= i; return *this; }
+    MV operator <<(int i) const                { return MV(x << i, y << i); }
+#endif
+
+    MV operator >>(int i) const                { return MV(x >> i, y >> i); }
+
+    MV operator *(int16_t i) const             { return MV(x * i, y * i); }
+
+    MV operator -(const MV& other) const       { return MV(x - other.x, y - other.y); }
+
+    MV operator +(const MV& other) const       { return MV(x + other.x, y + other.y); }
+
+    bool operator ==(const MV& other) const    { return word == other.word; }
+
+    bool operator !=(const MV& other) const    { return word != other.word; }
+
+    bool operator !() const                    { return !word; }
+
+    // Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
+    MV roundToFPel() const                     { return MV((x + 2) >> 2, (y + 2) >> 2); }
+
+    // Scale up an FPEL mv to QPEL by shifting up two bits
+    MV toQPel() const                          { return *this << 2; }
+
+    bool inline notZero() const                { return this->word != 0; }
+
+    bool inline isSubpel() const               { return (this->word & 0x00030003) != 0; }
+
+    MV mvmin(const MV& m) const                { return MV(x > m.x ? m.x : x, y > m.y ? m.y : y); }
+
+    MV mvmax(const MV& m) const                { return MV(x < m.x ? m.x : x, y < m.y ? m.y : y); }
+
+    MV clipped(const MV& _min, const MV& _max) const
+    {
+        MV cl = mvmin(_max);
+
+        return cl.mvmax(_min);
+    }
+
+    // returns true if MV is within range (inclusive)
+    bool checkRange(const MV& _min, const MV& _max) const
+    {
+        return x >= _min.x && x <= _max.x && y >= _min.y && y <= _max.y;
+    }
+};
+}
+
+#endif // ifndef X265_MV_H
--- a/x265/source/common/param.cpp
+++ b/x265/source/common/param.cpp
--- a/x265/source/common/param.h
+++ b/x265/source/common/param.h
@ -0,0 +1,62 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
+ *          Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_PARAM_H
+#define X265_PARAM_H
+
+namespace X265_NS {
+
+int   x265_check_params(x265_param *param);
+int   x265_set_globals(x265_param *param);
+void  x265_print_params(x265_param *param);
+void  x265_print_reconfigured_params(x265_param* param, x265_param* reconfiguredParam);
+void  x265_param_apply_fastfirstpass(x265_param *p);
+char* x265_param2string(x265_param *param);
+int   x265_atoi(const char *str, bool& bError);
+double x265_atof(const char *str, bool& bError);
+int   parseCpuName(const char *value, bool& bError);
+void  setParamAspectRatio(x265_param *p, int width, int height);
+void  getParamAspectRatio(x265_param *p, int& width, int& height);
+bool  parseLambdaFile(x265_param *param);
+
+/* this table is kept internal to avoid confusion, since log level indices start at -1 */
+static const char * const logLevelNames[] = { "none", "error", "warning", "info", "debug", "full", 0 };
+
+#if EXPORT_C_API
+#define PARAM_NS
+#else
+/* declare param functions within private namespace */
+void x265_param_free(x265_param *);
+x265_param* x265_param_alloc();
+void x265_param_default(x265_param *param);
+int x265_param_default_preset(x265_param *, const char *preset, const char *tune);
+int x265_param_apply_profile(x265_param *, const char *profile);
+int x265_param_parse(x265_param *p, const char *name, const char *value);
+#define PARAM_NS X265_NS
+#endif
+
+#define MAXPARAMSIZE 2000
+}
+
+#endif // ifndef X265_PARAM_H
--- a/x265/source/common/piclist.cpp
+++ b/x265/source/common/piclist.cpp
@ -0,0 +1,151 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "piclist.h"
+#include "frame.h"
+
+using namespace X265_NS;
+
+void PicList::pushFront(Frame& curFrame)
+{
+    X265_CHECK(!curFrame.m_next && !curFrame.m_prev, "piclist: picture already in list\n"); // ensure frame is not in a list
+    curFrame.m_next = m_start;
+    curFrame.m_prev = NULL;
+
+    if (m_count)
+    {
+        m_start->m_prev = &curFrame;
+        m_start = &curFrame;
+    }
+    else
+    {
+        m_start = m_end = &curFrame;
+    }
+    m_count++;
+}
+
+void PicList::pushBack(Frame& curFrame)
+{
+    X265_CHECK(!curFrame.m_next && !curFrame.m_prev, "piclist: picture already in list\n"); // ensure frame is not in a list
+    curFrame.m_next = NULL;
+    curFrame.m_prev = m_end;
+
+    if (m_count)
+    {
+        m_end->m_next = &curFrame;
+        m_end = &curFrame;
+    }
+    else
+    {
+        m_start = m_end = &curFrame;
+    }
+    m_count++;
+}
+
+Frame *PicList::popFront()
+{
+    if (m_start)
+    {
+        Frame *temp = m_start;
+        m_count--;
+
+        if (m_count)
+        {
+            m_start = m_start->m_next;
+            m_start->m_prev = NULL;
+        }
+        else
+        {
+            m_start = m_end = NULL;
+        }
+        temp->m_next = temp->m_prev = NULL;
+        return temp;
+    }
+    else
+        return NULL;
+}
+
+Frame* PicList::getPOC(int poc)
+{
+    Frame *curFrame = m_start;
+    while (curFrame && curFrame->m_poc != poc)
+        curFrame = curFrame->m_next;
+    return curFrame;
+}
+
+Frame *PicList::popBack()
+{
+    if (m_end)
+    {
+        Frame* temp = m_end;
+        m_count--;
+
+        if (m_count)
+        {
+            m_end = m_end->m_prev;
+            m_end->m_next = NULL;
+        }
+        else
+        {
+            m_start = m_end = NULL;
+        }
+        temp->m_next = temp->m_prev = NULL;
+        return temp;
+    }
+    else
+        return NULL;
+}
+
+void PicList::remove(Frame& curFrame)
+{
+#if _DEBUG
+    Frame *tmp = m_start;
+    while (tmp && tmp != &curFrame)
+    {
+        tmp = tmp->m_next;
+    }
+
+    X265_CHECK(tmp == &curFrame, "piclist: pic being removed was not in list\n"); // verify pic is in this list
+#endif
+
+    m_count--;
+    if (m_count)
+    {
+        if (m_start == &curFrame)
+            m_start = curFrame.m_next;
+        if (m_end == &curFrame)
+            m_end = curFrame.m_prev;
+
+        if (curFrame.m_next)
+            curFrame.m_next->m_prev = curFrame.m_prev;
+        if (curFrame.m_prev)
+            curFrame.m_prev->m_next = curFrame.m_next;
+    }
+    else
+    {
+        m_start = m_end = NULL;
+    }
+
+    curFrame.m_next = curFrame.m_prev = NULL;
+}
--- a/x265/source/common/piclist.h
+++ b/x265/source/common/piclist.h
@ -0,0 +1,80 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_PICLIST_H
+#define X265_PICLIST_H
+
+#include "common.h"
+
+namespace X265_NS {
+
+class Frame;
+
+class PicList
+{
+protected:
+
+    Frame*   m_start;
+    Frame*   m_end;
+    int      m_count;
+
+public:
+
+    PicList()
+    {
+        m_start = NULL;
+        m_end   = NULL;
+        m_count = 0;
+    }
+
+    /** Push picture to end of the list */
+    void pushBack(Frame& pic);
+
+    /** Push picture to beginning of the list */
+    void pushFront(Frame& pic);
+
+    /** Pop picture from end of the list */
+    Frame* popBack();
+
+    /** Pop picture from beginning of the list */
+    Frame* popFront();
+
+    /** Find frame with specified POC */
+    Frame* getPOC(int poc);
+
+    /** Remove picture from list */
+    void remove(Frame& pic);
+
+    Frame* first()        { return m_start;   }
+
+    Frame* last()         { return m_end;     }
+
+    int size()            { return m_count;   }
+
+    bool empty() const    { return !m_count;  }
+
+    operator bool() const { return !!m_count; }
+};
+}
+
+#endif // ifndef X265_PICLIST_H
--- a/x265/source/common/picyuv.cpp
+++ b/x265/source/common/picyuv.cpp
@ -0,0 +1,427 @@
+/*****************************************************************************
+ * Copyright (C) 2015 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "picyuv.h"
+#include "slice.h"
+#include "primitives.h"
+
+using namespace X265_NS;
+
+PicYuv::PicYuv()
+{
+    m_picBuf[0] = NULL;
+    m_picBuf[1] = NULL;
+    m_picBuf[2] = NULL;
+
+    m_picOrg[0] = NULL;
+    m_picOrg[1] = NULL;
+    m_picOrg[2] = NULL;
+
+    m_cuOffsetY = NULL;
+    m_cuOffsetC = NULL;
+    m_buOffsetY = NULL;
+    m_buOffsetC = NULL;
+
+    m_maxLumaLevel = 0;
+    m_avgLumaLevel = 0;
+}
+
+bool PicYuv::create(uint32_t picWidth, uint32_t picHeight, uint32_t picCsp)
+{
+    m_picWidth  = picWidth;
+    m_picHeight = picHeight;
+    m_hChromaShift = CHROMA_H_SHIFT(picCsp);
+    m_vChromaShift = CHROMA_V_SHIFT(picCsp);
+    m_picCsp = picCsp;
+
+    uint32_t numCuInWidth = (m_picWidth + g_maxCUSize - 1)  / g_maxCUSize;
+    uint32_t numCuInHeight = (m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
+
+    m_lumaMarginX = g_maxCUSize + 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment
+    m_lumaMarginY = g_maxCUSize + 16; // margin for 8-tap filter and infinite padding
+    m_stride = (numCuInWidth * g_maxCUSize) + (m_lumaMarginX << 1);
+
+    m_chromaMarginX = m_lumaMarginX;  // keep 16-byte alignment for chroma CTUs
+    m_chromaMarginY = m_lumaMarginY >> m_vChromaShift;
+
+    m_strideC = ((numCuInWidth * g_maxCUSize) >> m_hChromaShift) + (m_chromaMarginX * 2);
+    int maxHeight = numCuInHeight * g_maxCUSize;
+
+    CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  + m_lumaMarginX;
+
+    if (m_picCsp != X265_CSP_I400) {
+        CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
+        CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
+        
+        m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC + m_chromaMarginX;
+        m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC + m_chromaMarginX;
+    }
+
+    return true;
+
+fail:
+    return false;
+}
+
+/* the first picture allocated by the encoder will be asked to generate these
+ * offset arrays. Once generated, they will be provided to all future PicYuv
+ * allocated by the same encoder. */
+bool PicYuv::createOffsets(const SPS& sps)
+{
+    uint32_t numPartitions = 1 << (g_unitSizeDepth * 2);
+    CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
+    if (m_picCsp != X265_CSP_I400) {
+        CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
+    }
+    for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
+    {
+        for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
+        {
+            m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride * cuRow * g_maxCUSize + cuCol * g_maxCUSize;
+            if (m_picCsp != X265_CSP_I400) {
+                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >> m_hChromaShift);
+            }
+        }
+    }
+
+    CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
+    if (m_picCsp != X265_CSP_I400) {
+        CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
+    }
+    for (uint32_t idx = 0; idx < numPartitions; ++idx)
+    {
+        intptr_t x = g_zscanToPelX[idx];
+        intptr_t y = g_zscanToPelY[idx];
+        m_buOffsetY[idx] = m_stride * y + x;
+        if (m_picCsp != X265_CSP_I400) {
+            m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >> m_hChromaShift);
+        }
+    }
+
+    return true;
+
+fail:
+    return false;
+}
+
+void PicYuv::destroy()
+{
+    X265_FREE(m_picBuf[0]);
+    X265_FREE(m_picBuf[1]);
+    X265_FREE(m_picBuf[2]);
+}
+
+/* Copy pixels from an x265_picture into internal PicYuv instance.
+ * Shift pixels as necessary, mask off bits above X265_DEPTH for safety. */
+void PicYuv::copyFromPicture(const x265_picture& pic, const x265_param& param, int padx, int pady)
+{
+    /* m_picWidth is the width that is being encoded, padx indicates how many
+     * of those pixels are padding to reach multiple of MinCU(4) size.
+     *
+     * Internally, we need to extend rows out to a multiple of 16 for lowres
+     * downscale and other operations. But those padding pixels are never
+     * encoded.
+     *
+     * The same applies to m_picHeight and pady */
+
+    /* width and height - without padsize (input picture raw width and height) */
+    int width = m_picWidth - padx;
+    int height = m_picHeight - pady;
+
+    /* internal pad to multiple of 16x16 blocks */
+    uint8_t rem = width & 15;
+
+    padx = rem ? 16 - rem : padx;
+    rem = height & 15;
+    pady = rem ? 16 - rem : pady;
+
+    /* add one more row and col of pad for downscale interpolation, fixes
+     * warnings from valgrind about using uninitialized pixels */
+    padx++;
+    pady++;
+
+    X265_CHECK(pic.bitDepth >= 8, "pic.bitDepth check failure");
+
+    if (pic.bitDepth == 8)
+    {
+#if (X265_DEPTH > 8)
+        {
+            pixel *yPixel = m_picOrg[0];
+            pixel *uPixel = m_picOrg[1];
+            pixel *vPixel = m_picOrg[2];
+
+            uint8_t *yChar = (uint8_t*)pic.planes[0];
+            uint8_t *uChar = (uint8_t*)pic.planes[1];
+            uint8_t *vChar = (uint8_t*)pic.planes[2];
+            int shift = (X265_DEPTH - 8);
+
+            primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
+            if (m_picCsp != X265_CSP_I400) {
+                primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
+                primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
+            }
+        }
+#else /* Case for (X265_DEPTH == 8) */
+        // TODO: Does we need this path? may merge into above in future
+        {
+            pixel *yPixel = m_picOrg[0];
+            pixel *uPixel = m_picOrg[1];
+            pixel *vPixel = m_picOrg[2];
+
+            uint8_t *yChar = (uint8_t*)pic.planes[0];
+            uint8_t *uChar = (uint8_t*)pic.planes[1];
+            uint8_t *vChar = (uint8_t*)pic.planes[2];
+
+            for (int r = 0; r < height; r++)
+            {
+                memcpy(yPixel, yChar, width * sizeof(pixel));
+
+                yPixel += m_stride;
+                yChar += pic.stride[0] / sizeof(*yChar);
+            }
+
+            if (m_picCsp != X265_CSP_I400) {
+                for (int r = 0; r < height >> m_vChromaShift; r++)
+                    {
+                        memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
+                        memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
+                        
+                        uPixel += m_strideC;
+                        vPixel += m_strideC;
+                        uChar += pic.stride[1] / sizeof(*uChar);
+                        vChar += pic.stride[2] / sizeof(*vChar);
+                    }
+            }
+        }
+#endif /* (X265_DEPTH > 8) */
+    }
+    else /* pic.bitDepth > 8 */
+    {
+        /* defensive programming, mask off bits that are supposed to be zero */
+        uint16_t mask = (1 << X265_DEPTH) - 1;
+        int shift = abs(pic.bitDepth - X265_DEPTH);
+        pixel *yPixel = m_picOrg[0];
+        pixel *uPixel = m_picOrg[1];
+        pixel *vPixel = m_picOrg[2];
+
+        uint16_t *yShort = (uint16_t*)pic.planes[0];
+        uint16_t *uShort = (uint16_t*)pic.planes[1];
+        uint16_t *vShort = (uint16_t*)pic.planes[2];
+
+        if (pic.bitDepth > X265_DEPTH)
+        {
+            /* shift right and mask pixels to final size */
+            primitives.planecopy_sp(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
+            if (m_picCsp != X265_CSP_I400) {
+                primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+                primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            }
+        }
+        else /* Case for (pic.bitDepth <= X265_DEPTH) */
+        {
+            /* shift left and mask pixels to final size */
+            primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
+            if (m_picCsp != X265_CSP_I400) {
+                primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+                primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            }
+        }
+    }
+
+    /* extend the right edge if width was not multiple of the minimum CU size */
+
+    pixel *Y = m_picOrg[0];
+    pixel *U = m_picOrg[1];
+    pixel *V = m_picOrg[2];
+
+    uint64_t sumLuma;
+    m_maxLumaLevel = primitives.planeClipAndMax(Y, m_stride, width, height, &sumLuma, (pixel)param.minLuma, (pixel)param.maxLuma);
+    m_avgLumaLevel = (double)(sumLuma) / (m_picHeight * m_picWidth);
+
+    for (int r = 0; r < height; r++)
+    {
+        for (int x = 0; x < padx; x++)
+            Y[width + x] = Y[width - 1];
+
+        Y += m_stride;
+    }
+
+    if (m_picCsp != X265_CSP_I400) {
+        for (int r = 0; r < height >> m_vChromaShift; r++)
+            {
+                for (int x = 0; x < padx >> m_hChromaShift; x++)
+                    {
+                        U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
+                        V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
+                    }
+                
+                U += m_strideC;
+                V += m_strideC;
+            }
+    }
+
+    /* extend the bottom if height was not multiple of the minimum CU size */
+    Y = m_picOrg[0] + (height - 1) * m_stride;
+
+    for (int i = 1; i <= pady; i++)
+        memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
+
+    if (m_picCsp != X265_CSP_I400) {
+        U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
+        V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
+        for (int j = 1; j <= pady >> m_vChromaShift; j++)
+            {
+                memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
+                memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
+            }
+    }
+}
+
+namespace X265_NS {
+
+template<uint32_t OUTPUT_BITDEPTH_DIV8>
+static void md5_block(MD5Context& md5, const pixel* plane, uint32_t n)
+{
+    /* create a 64 byte buffer for packing pixel's into */
+    uint8_t buf[64 / OUTPUT_BITDEPTH_DIV8][OUTPUT_BITDEPTH_DIV8];
+
+    for (uint32_t i = 0; i < n; i++)
+    {
+        pixel pel = plane[i];
+        /* perform bitdepth and endian conversion */
+        for (uint32_t d = 0; d < OUTPUT_BITDEPTH_DIV8; d++)
+            buf[i][d] = (uint8_t)(pel >> (d * 8));
+    }
+
+    MD5Update(&md5, (uint8_t*)buf, n * OUTPUT_BITDEPTH_DIV8);
+}
+
+/* Update md5 with all samples in plane in raster order, each sample
+ * is adjusted to OUTBIT_BITDEPTH_DIV8 */
+template<uint32_t OUTPUT_BITDEPTH_DIV8>
+static void md5_plane(MD5Context& md5, const pixel* plane, uint32_t width, uint32_t height, intptr_t stride)
+{
+    /* N is the number of samples to process per md5 update.
+     * All N samples must fit in buf */
+    uint32_t N = 32;
+    uint32_t width_modN = width % N;
+    uint32_t width_less_modN = width - width_modN;
+
+    for (uint32_t y = 0; y < height; y++)
+    {
+        /* convert pel's into uint32_t chars in little endian byte order.
+         * NB, for 8bit data, data is truncated to 8bits. */
+        for (uint32_t x = 0; x < width_less_modN; x += N)
+            md5_block<OUTPUT_BITDEPTH_DIV8>(md5, &plane[y * stride + x], N);
+
+        /* mop up any of the remaining line */
+        md5_block<OUTPUT_BITDEPTH_DIV8>(md5, &plane[y * stride + width_less_modN], width_modN);
+    }
+}
+
+void updateCRC(const pixel* plane, uint32_t& crcVal, uint32_t height, uint32_t width, intptr_t stride)
+{
+    uint32_t crcMsb;
+    uint32_t bitVal;
+    uint32_t bitIdx;
+
+    for (uint32_t y = 0; y < height; y++)
+    {
+        for (uint32_t x = 0; x < width; x++)
+        {
+            // take CRC of first pictureData byte
+            for (bitIdx = 0; bitIdx < 8; bitIdx++)
+            {
+                crcMsb = (crcVal >> 15) & 1;
+                bitVal = (plane[y * stride + x] >> (7 - bitIdx)) & 1;
+                crcVal = (((crcVal << 1) + bitVal) & 0xffff) ^ (crcMsb * 0x1021);
+            }
+
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
+            // take CRC of second pictureData byte if bit depth is greater than 8-bits
+            if (X265_DEPTH > 8)
+            {
+                for (bitIdx = 0; bitIdx < 8; bitIdx++)
+                {
+                    crcMsb = (crcVal >> 15) & 1;
+                    bitVal = (plane[y * stride + x] >> (15 - bitIdx)) & 1;
+                    crcVal = (((crcVal << 1) + bitVal) & 0xffff) ^ (crcMsb * 0x1021);
+                }
+            }
+        }
+    }
+}
+
+void crcFinish(uint32_t& crcVal, uint8_t digest[16])
+{
+    uint32_t crcMsb;
+
+    for (int bitIdx = 0; bitIdx < 16; bitIdx++)
+    {
+        crcMsb = (crcVal >> 15) & 1;
+        crcVal = ((crcVal << 1) & 0xffff) ^ (crcMsb * 0x1021);
+    }
+
+    digest[0] = (crcVal >> 8)  & 0xff;
+    digest[1] =  crcVal        & 0xff;
+}
+
+void updateChecksum(const pixel* plane, uint32_t& checksumVal, uint32_t height, uint32_t width, intptr_t stride, int row, uint32_t cuHeight)
+{
+    uint8_t xor_mask;
+
+    for (uint32_t y = row * cuHeight; y < ((row * cuHeight) + height); y++)
+    {
+        for (uint32_t x = 0; x < width; x++)
+        {
+            xor_mask = (uint8_t)((x & 0xff) ^ (y & 0xff) ^ (x >> 8) ^ (y >> 8));
+            checksumVal = (checksumVal + ((plane[y * stride + x] & 0xff) ^ xor_mask)) & 0xffffffff;
+
+            if (X265_DEPTH > 8)
+                checksumVal = (checksumVal + ((plane[y * stride + x] >> 7 >> 1) ^ xor_mask)) & 0xffffffff;
+        }
+    }
+}
+
+void checksumFinish(uint32_t checksum, uint8_t digest[16])
+{
+    digest[0] = (checksum >> 24) & 0xff;
+    digest[1] = (checksum >> 16) & 0xff;
+    digest[2] = (checksum >> 8)  & 0xff;
+    digest[3] =  checksum        & 0xff;
+}
+
+void updateMD5Plane(MD5Context& md5, const pixel* plane, uint32_t width, uint32_t height, intptr_t stride)
+{
+    /* choose an md5_plane packing function based on the system bitdepth */
+    typedef void(*MD5PlaneFunc)(MD5Context&, const pixel*, uint32_t, uint32_t, intptr_t);
+    MD5PlaneFunc md5_plane_func;
+    md5_plane_func = X265_DEPTH <= 8 ? (MD5PlaneFunc)md5_plane<1> : (MD5PlaneFunc)md5_plane<2>;
+
+    md5_plane_func(md5, plane, width, height, stride);
+}
+}
--- a/x265/source/common/picyuv.h
+++ b/x265/source/common/picyuv.h
@ -0,0 +1,106 @@
+/*****************************************************************************
+ * Copyright (C) 2015 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef X265_PICYUV_H
+#define X265_PICYUV_H
+
+#include "common.h"
+#include "md5.h"
+#include "x265.h"
+
+namespace X265_NS {
+// private namespace
+
+class ShortYuv;
+struct SPS;
+
+class PicYuv
+{
+public:
+
+    pixel*   m_picBuf[3];  // full allocated buffers, including margins
+    pixel*   m_picOrg[3];  // pointers to plane starts
+
+    uint32_t m_picWidth;
+    uint32_t m_picHeight;
+    intptr_t m_stride;
+    intptr_t m_strideC;
+
+    uint32_t m_picCsp;
+    uint32_t m_hChromaShift;
+    uint32_t m_vChromaShift;
+
+    intptr_t* m_cuOffsetY;  /* these four buffers are owned by the top-level encoder */
+    intptr_t* m_cuOffsetC;
+    intptr_t* m_buOffsetY;
+    intptr_t* m_buOffsetC;
+
+    uint32_t m_lumaMarginX;
+    uint32_t m_lumaMarginY;
+    uint32_t m_chromaMarginX;
+    uint32_t m_chromaMarginY;
+
+    uint16_t m_maxLumaLevel;
+    double   m_avgLumaLevel;
+
+    PicYuv();
+
+    bool  create(uint32_t picWidth, uint32_t picHeight, uint32_t csp);
+    bool  createOffsets(const SPS& sps);
+    void  destroy();
+
+    void  copyFromPicture(const x265_picture&, const x265_param& param, int padx, int pady);
+
+    intptr_t getChromaAddrOffset(uint32_t ctuAddr, uint32_t absPartIdx) const { return m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+
+    /* get pointer to CTU start address */
+    pixel*  getLumaAddr(uint32_t ctuAddr)                      { return m_picOrg[0] + m_cuOffsetY[ctuAddr]; }
+    pixel*  getCbAddr(uint32_t ctuAddr)                        { return m_picOrg[1] + m_cuOffsetC[ctuAddr]; }
+    pixel*  getCrAddr(uint32_t ctuAddr)                        { return m_picOrg[2] + m_cuOffsetC[ctuAddr]; }
+    pixel*  getChromaAddr(uint32_t chromaId, uint32_t ctuAddr) { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr]; }
+    pixel*  getPlaneAddr(uint32_t plane, uint32_t ctuAddr)     { return m_picOrg[plane] + (plane ? m_cuOffsetC[ctuAddr] : m_cuOffsetY[ctuAddr]); }
+    const pixel* getLumaAddr(uint32_t ctuAddr) const           { return m_picOrg[0] + m_cuOffsetY[ctuAddr]; }
+    const pixel* getCbAddr(uint32_t ctuAddr) const             { return m_picOrg[1] + m_cuOffsetC[ctuAddr]; }
+    const pixel* getCrAddr(uint32_t ctuAddr) const             { return m_picOrg[2] + m_cuOffsetC[ctuAddr]; }
+    const pixel* getChromaAddr(uint32_t chromaId, uint32_t ctuAddr) const { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr]; }
+    const pixel* getPlaneAddr(uint32_t plane, uint32_t ctuAddr) const     { return m_picOrg[plane] + (plane ? m_cuOffsetC[ctuAddr] : m_cuOffsetY[ctuAddr]); }
+
+    /* get pointer to CU start address */
+    pixel*  getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return m_picOrg[0] + m_cuOffsetY[ctuAddr] + m_buOffsetY[absPartIdx]; }
+    pixel*  getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return m_picOrg[1] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+    pixel*  getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return m_picOrg[2] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+    pixel*  getChromaAddr(uint32_t chromaId, uint32_t ctuAddr, uint32_t absPartIdx) { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+    const pixel* getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) const { return m_picOrg[0] + m_cuOffsetY[ctuAddr] + m_buOffsetY[absPartIdx]; }
+    const pixel* getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx) const   { return m_picOrg[1] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+    const pixel* getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx) const   { return m_picOrg[2] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+    const pixel* getChromaAddr(uint32_t chromaId, uint32_t ctuAddr, uint32_t absPartIdx) const { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
+};
+
+void updateChecksum(const pixel* plane, uint32_t& checksumVal, uint32_t height, uint32_t width, intptr_t stride, int row, uint32_t cuHeight);
+void updateCRC(const pixel* plane, uint32_t& crcVal, uint32_t height, uint32_t width, intptr_t stride);
+void crcFinish(uint32_t & crc, uint8_t digest[16]);
+void checksumFinish(uint32_t checksum, uint8_t digest[16]);
+void updateMD5Plane(MD5Context& md5, const pixel* plane, uint32_t width, uint32_t height, intptr_t stride);
+}
+
+#endif // ifndef X265_PICYUV_H
--- a/x265/source/common/pixel.cpp
+++ b/x265/source/common/pixel.cpp
--- a/Show more
+++ b/Show more
 @ -1 +1 @@
 .9.5
 .9.6