00001
00002
00003
00004
00005 #include "pch.h"
00006 #include "config.h"
00007
00008 #if CRYPTOPP_MSC_VERSION
00009 # pragma warning(disable: 4702 4731)
00010 #endif
00011
00012 #ifndef CRYPTOPP_GENERATE_X64_MASM
00013
00014 #include "sosemanuk.h"
00015 #include "serpentp.h"
00016 #include "secblock.h"
00017 #include "misc.h"
00018 #include "cpu.h"
00019
00020 NAMESPACE_BEGIN(CryptoPP)
00021
00022 void SosemanukPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKey, size_t keylen)
00023 {
00024 CRYPTOPP_UNUSED(params);
00025 Serpent_KeySchedule(m_key, 24, userKey, keylen);
00026 }
00027
00028 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
00029 {
00030 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(iv), CRYPTOPP_UNUSED(length);
00031 assert(length==16);
00032
00033 word32 a, b, c, d, e;
00034
00035 typedef BlockGetAndPut<word32, LittleEndian> Block;
00036 Block::Get(iv)(a)(b)(c)(d);
00037
00038 const word32 *k = m_key;
00039 unsigned int i=1;
00040
00041 do
00042 {
00043 beforeS0(KX); beforeS0(S0); afterS0(LT);
00044 afterS0(KX); afterS0(S1); afterS1(LT);
00045 if (i == 3)
00046 {
00047 m_state[4] = b;
00048 m_state[5] = e;
00049 m_state[10] = c;
00050 m_state[11] = a;
00051 }
00052 afterS1(KX); afterS1(S2); afterS2(LT);
00053 afterS2(KX); afterS2(S3); afterS3(LT);
00054 if (i == 2)
00055 {
00056 m_state[6] = c;
00057 m_state[7] = d;
00058 m_state[8] = b;
00059 m_state[9] = e;
00060 }
00061 afterS3(KX); afterS3(S4); afterS4(LT);
00062 afterS4(KX); afterS4(S5); afterS5(LT);
00063 afterS5(KX); afterS5(S6); afterS6(LT);
00064 afterS6(KX); afterS6(S7); afterS7(LT);
00065
00066 if (i == 3)
00067 break;
00068
00069 ++i;
00070 c = b;
00071 b = e;
00072 e = d;
00073 d = a;
00074 a = e;
00075 k += 32;
00076 }
00077 while (true);
00078
00079 afterS7(KX);
00080
00081 m_state[0] = a;
00082 m_state[1] = b;
00083 m_state[2] = e;
00084 m_state[3] = d;
00085
00086 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
00087 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
00088 m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
00089 }
00090
00091 extern "C" {
00092 word32 s_sosemanukMulTables[512] = {
00093 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00094 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
00095 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
00096 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
00097 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
00098 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
00099 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
00100 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
00101 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
00102 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
00103 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
00104 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
00105 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
00106 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
00107 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
00108 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
00109 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
00110 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
00111 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
00112 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
00113 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
00114 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
00115 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
00116 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
00117 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
00118 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
00119 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
00120 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
00121 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
00122 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
00123 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
00124 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
00125 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
00126 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
00127 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
00128 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
00129 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
00130 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
00131 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
00132 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
00133 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
00134 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
00135 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
00136 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
00137 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
00138 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
00139 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
00140 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
00141 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
00142 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
00143 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
00144 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
00145 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
00146 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
00147 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
00148 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
00149 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
00150 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
00151 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
00152 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
00153 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
00154 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
00155 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
00156 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
00157 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
00158 #else
00159 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
00160 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
00161 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
00162 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
00163 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
00164 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
00165 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
00166 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
00167 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
00168 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
00169 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
00170 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
00171 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
00172 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
00173 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
00174 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
00175 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
00176 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
00177 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
00178 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
00179 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
00180 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
00181 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
00182 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
00183 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
00184 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
00185 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
00186 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
00187 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
00188 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
00189 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
00190 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
00191 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
00192 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
00193 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
00194 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
00195 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
00196 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
00197 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
00198 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
00199 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
00200 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
00201 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
00202 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
00203 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
00204 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
00205 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
00206 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
00207 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
00208 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
00209 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
00210 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
00211 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
00212 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
00213 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
00214 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
00215 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
00216 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
00217 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
00218 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
00219 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
00220 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
00221 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
00222 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
00223 #endif
00224 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
00225 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
00226 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
00227 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
00228 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
00229 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
00230 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
00231 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
00232 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
00233 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
00234 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
00235 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
00236 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
00237 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
00238 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
00239 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
00240 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
00241 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
00242 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
00243 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
00244 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
00245 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
00246 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
00247 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
00248 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
00249 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
00250 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
00251 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
00252 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
00253 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
00254 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
00255 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
00256 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
00257 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
00258 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
00259 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
00260 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
00261 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
00262 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
00263 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
00264 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
00265 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
00266 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
00267 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
00268 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
00269 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
00270 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
00271 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
00272 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
00273 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
00274 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
00275 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
00276 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
00277 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
00278 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
00279 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
00280 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
00281 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
00282 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
00283 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
00284 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
00285 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
00286 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
00287 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
00288 };
00289 }
00290
00291 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00292 unsigned int SosemanukPolicy::GetAlignment() const
00293 {
00294 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00295 #ifdef __INTEL_COMPILER
00296 if (HasSSE2() && !IsP4())
00297 #else
00298 if (HasSSE2())
00299 #endif
00300 return 16;
00301 else
00302 #endif
00303 return GetAlignmentOf<word32>();
00304 }
00305
00306 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
00307 {
00308 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00309 #ifdef __INTEL_COMPILER
00310 if (HasSSE2() && !IsP4())
00311 #else
00312 if (HasSSE2())
00313 #endif
00314 return 4*BYTES_PER_ITERATION;
00315 else
00316 #endif
00317 return BYTES_PER_ITERATION;
00318 }
00319 #endif
00320
00321 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00322 extern "C" {
00323 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
00324 }
00325 #endif
00326
00327 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00328 {
00329 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
00330
00331 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00332 Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
00333 return;
00334 #endif
00335
00336 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00337 #ifdef CRYPTOPP_GENERATE_X64_MASM
00338 ALIGN 8
00339 Sosemanuk_OperateKeystream PROC FRAME
00340 rex_push_reg rsi
00341 push_reg rdi
00342 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
00343 save_xmm128 xmm6, 02f0h
00344 save_xmm128 xmm7, 0300h
00345 .endprolog
00346 mov rdi, r8
00347 mov rax, r9
00348 #else
00349 #ifdef __INTEL_COMPILER
00350 if (HasSSE2() && !IsP4())
00351 #else
00352 if (HasSSE2())
00353 #endif
00354 {
00355 #ifdef __GNUC__
00356 #if CRYPTOPP_BOOL_X64
00357 FixedSizeAlignedSecBlock<byte, 80*4*2+12*4+8*WORD_SZ> workspace;
00358 #endif
00359 __asm__ __volatile__
00360 (
00361 INTEL_NOPREFIX
00362 AS_PUSH_IF86( bx)
00363 #else
00364 word32 *state = m_state;
00365 AS2( mov WORD_REG(ax), state)
00366 AS2( mov WORD_REG(di), output)
00367 AS2( mov WORD_REG(dx), input)
00368 AS2( mov WORD_REG(cx), iterationCount)
00369 #endif
00370 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
00371
00372 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
00373 #define SSE2_workspace %5
00374 #else
00375 #define SSE2_workspace WORD_REG(sp)
00376 #endif
00377
00378 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
00379 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
00380 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
00381 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
00382 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
00383 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
00384 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
00385 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
00386 #define SSE2_uvStart SSE2_stateCopy + 12*4
00387
00388 #if (CRYPTOPP_BOOL_X86) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00389 AS_PUSH_IF86( bp)
00390 AS2( mov AS_REG_6, esp)
00391 AS2( and esp, -16)
00392 AS2( sub esp, 80*4*2+12*4+8*WORD_SZ)
00393 AS2( mov [esp], AS_REG_6)
00394 #endif
00395 AS2( mov SSE2_output, WORD_REG(di))
00396 AS2( mov SSE2_input, WORD_REG(dx))
00397 AS2( mov SSE2_state, WORD_REG(ax))
00398 #ifndef _MSC_VER
00399 AS2( mov SSE2_pMulTables, WORD_REG(si))
00400 #endif
00401 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
00402 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
00403 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00404 AS2( movdqa xmm0, [WORD_REG(ax)+0*16])
00405 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
00406 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
00407 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
00408 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
00409 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
00410 AS2( psrlq xmm0, 32)
00411 AS2( movd AS_REG_6d, xmm0)
00412 AS2( mov ecx, [WORD_REG(ax)+10*4])
00413 AS2( mov edx, [WORD_REG(ax)+11*4])
00414 AS2( pcmpeqb xmm7, xmm7)
00415
00416 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
00417 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
00418 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
00419
00420 #define R10 ecx
00421 #define R11 edx
00422 #define R20 edx
00423 #define R21 ecx
00424
00425 #define R20r WORD_REG(dx)
00426 #define R21r WORD_REG(cx)
00427
00428 #define SSE2_STEP(i, j) \
00429 AS2( mov eax, [s(i+0)])\
00430 AS2( mov [v(i)], eax)\
00431 AS2( rol eax, 8)\
00432 AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\
00433 AS2( xor AS_REG_7d, R1##j)\
00434 AS2( mov [u(i)], AS_REG_7d)\
00435 AS2( mov AS_REG_7d, 1)\
00436 AS2( and AS_REG_7d, R2##j)\
00437 AS1( neg AS_REG_7d)\
00438 AS2( and AS_REG_7d, AS_REG_6d)\
00439 AS2( xor AS_REG_6d, eax)\
00440 AS2( movzx eax, al)\
00441 AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
00442 AS2( mov eax, [s(i+3)])\
00443 AS2( xor AS_REG_7d, [s(i+2)])\
00444 AS2( add R1##j, AS_REG_7d)\
00445 AS2( movzx AS_REG_7d, al)\
00446 AS2( shr eax, 8)\
00447 AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
00448 AS2( xor AS_REG_6d, eax)\
00449 AS2( imul R2##j, AS_HEX(54655307))\
00450 AS2( rol R2##j, 7)\
00451 AS2( mov [s(i+0)], AS_REG_6d)\
00452
00453 ASL(2)
00454 AS2( lea WORD_REG(di), [SSE2_uvStart])
00455 AS2( mov WORD_REG(ax), 80)
00456 AS2( cmp WORD_REG(si), 80)
00457 AS2( cmovg WORD_REG(si), WORD_REG(ax))
00458 AS2( mov SSE2_wordsLeft2, WORD_REG(si))
00459 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)])
00460 AS2( mov SSE2_diEnd, WORD_REG(si))
00461 #ifdef _MSC_VER
00462 AS2( lea WORD_REG(si), s_sosemanukMulTables)
00463 #else
00464 AS2( mov WORD_REG(si), SSE2_pMulTables)
00465 #endif
00466
00467 ASL(0)
00468 SSE2_STEP(0, 0)
00469 SSE2_STEP(1, 1)
00470 SSE2_STEP(2, 0)
00471 SSE2_STEP(3, 1)
00472 SSE2_STEP(4, 0)
00473 SSE2_STEP(5, 1)
00474 SSE2_STEP(6, 0)
00475 SSE2_STEP(7, 1)
00476 SSE2_STEP(8, 0)
00477 SSE2_STEP(9, 1)
00478 SSE2_STEP(10, 0)
00479 SSE2_STEP(11, 1)
00480 SSE2_STEP(12, 0)
00481 SSE2_STEP(13, 1)
00482 SSE2_STEP(14, 0)
00483 SSE2_STEP(15, 1)
00484 SSE2_STEP(16, 0)
00485 SSE2_STEP(17, 1)
00486 SSE2_STEP(18, 0)
00487 SSE2_STEP(19, 1)
00488
00489 AS2( add WORD_REG(di), 5*4)
00490 AS2( cmp WORD_REG(di), SSE2_diEnd)
00491 ASJ( jne, 0, b)
00492
00493 AS2( mov WORD_REG(ax), SSE2_input)
00494 AS2( mov AS_REG_7, SSE2_output)
00495 AS2( lea WORD_REG(di), [SSE2_uvStart])
00496 AS2( mov WORD_REG(si), SSE2_wordsLeft2)
00497
00498 ASL(1)
00499 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
00500 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
00501 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
00502 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
00503
00504 AS2( movdqa xmm4, xmm0)
00505 AS2( pand xmm0, xmm2)
00506 AS2( pxor xmm0, xmm3)
00507 AS2( pxor xmm2, xmm1)
00508 AS2( pxor xmm2, xmm0)
00509 AS2( por xmm3, xmm4)
00510 AS2( pxor xmm3, xmm1)
00511 AS2( pxor xmm4, xmm2)
00512 AS2( movdqa xmm1, xmm3)
00513 AS2( por xmm3, xmm4)
00514 AS2( pxor xmm3, xmm0)
00515 AS2( pand xmm0, xmm1)
00516 AS2( pxor xmm4, xmm0)
00517 AS2( pxor xmm1, xmm3)
00518 AS2( pxor xmm1, xmm4)
00519 AS2( pxor xmm4, xmm7)
00520
00521 AS2( pxor xmm2, [WORD_REG(di)+80*4])
00522 AS2( pxor xmm3, [WORD_REG(di)+80*5])
00523 AS2( pxor xmm1, [WORD_REG(di)+80*6])
00524 AS2( pxor xmm4, [WORD_REG(di)+80*7])
00525
00526
00527 AS2( cmp WORD_REG(si), 16)
00528 ASJ( jl, 4, f)
00529
00530 AS2( movdqa xmm6, xmm2)
00531 AS2( punpckldq xmm2, xmm3)
00532 AS2( movdqa xmm5, xmm1)
00533 AS2( punpckldq xmm1, xmm4)
00534 AS2( movdqa xmm0, xmm2)
00535 AS2( punpcklqdq xmm2, xmm1)
00536 AS2( punpckhqdq xmm0, xmm1)
00537 AS2( punpckhdq xmm6, xmm3)
00538 AS2( punpckhdq xmm5, xmm4)
00539 AS2( movdqa xmm3, xmm6)
00540 AS2( punpcklqdq xmm6, xmm5)
00541 AS2( punpckhqdq xmm3, xmm5)
00542
00543
00544 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
00545
00546
00547 AS2( add WORD_REG(di), 4*4)
00548 AS2( sub WORD_REG(si), 16)
00549 ASJ( jnz, 1, b)
00550
00551
00552 AS2( mov WORD_REG(si), SSE2_wordsLeft)
00553 AS2( sub WORD_REG(si), 80)
00554 ASJ( jz, 6, f)
00555 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00556 AS2( mov SSE2_input, WORD_REG(ax))
00557 AS2( mov SSE2_output, AS_REG_7)
00558 ASJ( jmp, 2, b)
00559
00560 ASL(4)
00561 AS2( test WORD_REG(ax), WORD_REG(ax))
00562 ASJ( jz, 5, f)
00563 AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
00564 AS2( pxor xmm2, xmm0)
00565 AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
00566 AS2( pxor xmm3, xmm0)
00567 AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
00568 AS2( pxor xmm1, xmm0)
00569 AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
00570 AS2( pxor xmm4, xmm0)
00571 AS2( add WORD_REG(ax), 16)
00572 ASL(5)
00573 AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
00574 AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
00575 AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
00576 AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
00577 AS2( sub WORD_REG(si), 4)
00578 ASJ( jz, 6, f)
00579 AS2( add AS_REG_7, 16)
00580 AS2( psrldq xmm2, 4)
00581 AS2( psrldq xmm3, 4)
00582 AS2( psrldq xmm1, 4)
00583 AS2( psrldq xmm4, 4)
00584 ASJ( jmp, 4, b)
00585
00586 ASL(6)
00587 AS2( mov AS_REG_6, SSE2_state)
00588 AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
00589 AS2( movdqa [AS_REG_6+0*16], xmm0)
00590 AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
00591 AS2( movdqa [AS_REG_6+1*16], xmm0)
00592 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
00593 AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
00594 AS2( mov [AS_REG_6+10*4], ecx)
00595 AS2( mov [AS_REG_6+11*4], edx)
00596
00597 AS_POP_IF86( sp)
00598 AS_POP_IF86( bp)
00599
00600 #ifdef __GNUC__
00601 AS_POP_IF86( bx)
00602 ATT_PREFIX
00603 :
00604 : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
00605 #if CRYPTOPP_BOOL_X64
00606 , "r" (workspace.m_ptr)
00607 : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
00608 #else
00609 : "memory", "cc"
00610 #endif
00611 );
00612 #endif
00613 #ifdef CRYPTOPP_GENERATE_X64_MASM
00614 movdqa xmm6, [rsp + 02f0h]
00615 movdqa xmm7, [rsp + 0300h]
00616 add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
00617 pop rdi
00618 pop rsi
00619 ret
00620 Sosemanuk_OperateKeystream ENDP
00621 #else
00622 }
00623 else
00624 #endif
00625 #endif
00626 #ifndef CRYPTOPP_GENERATE_X64_MASM
00627 {
00628 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
00629 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)])
00630 #else
00631 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
00632 #endif
00633
00634 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
00635
00636 #define r1(i) ((i%2) ? reg2 : reg1)
00637 #define r2(i) ((i%2) ? reg1 : reg2)
00638
00639 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
00640 u = (s##x9 + r2(x0)) ^ r1(x0);\
00641 v = s##x0;\
00642 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
00643 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
00644 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
00645
00646 #define SOSEMANUK_OUTPUT(x) \
00647 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
00648 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
00649 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
00650 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
00651
00652 #define OUTPUT4 \
00653 S2(0, u0, u1, u2, u3, u4);\
00654 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
00655
00656 word32 s0 = m_state[0];
00657 word32 s1 = m_state[1];
00658 word32 s2 = m_state[2];
00659 word32 s3 = m_state[3];
00660 word32 s4 = m_state[4];
00661 word32 s5 = m_state[5];
00662 word32 s6 = m_state[6];
00663 word32 s7 = m_state[7];
00664 word32 s8 = m_state[8];
00665 word32 s9 = m_state[9];
00666 word32 reg1 = m_state[10];
00667 word32 reg2 = m_state[11];
00668 word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
00669
00670 do
00671 {
00672 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
00673 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
00674 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
00675 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
00676 OUTPUT4
00677 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
00678 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
00679 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
00680 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
00681 OUTPUT4
00682 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
00683 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
00684 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
00685 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
00686 OUTPUT4
00687 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
00688 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
00689 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
00690 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
00691 OUTPUT4
00692 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
00693 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
00694 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
00695 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
00696 OUTPUT4
00697 }
00698 while (--iterationCount);
00699
00700 m_state[0] = s0;
00701 m_state[1] = s1;
00702 m_state[2] = s2;
00703 m_state[3] = s3;
00704 m_state[4] = s4;
00705 m_state[5] = s5;
00706 m_state[6] = s6;
00707 m_state[7] = s7;
00708 m_state[8] = s8;
00709 m_state[9] = s9;
00710 m_state[10] = reg1;
00711 m_state[11] = reg2;
00712 }
00713 }
00714
00715 NAMESPACE_END
00716
00717 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM