- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 19.2k
PERF: fix performance regression from #62542 #62623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
be21b2e
              fc10a5f
              ab2fab8
              7e8033d
              5219386
              4ff07e3
              c7fc292
              4c8d770
              35f075a
              448f944
              cf0a26d
              2e5a47c
              ca32c01
              46c9883
              69c35ee
              40983dd
              00be2c2
              06297b6
              4f6c9a8
              832d99e
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -1834,6 +1834,34 @@ int uint64_conflict(uint_state *self) { | |
| return self->seen_uint && (self->seen_sint || self->seen_null); | ||
| } | ||
|  | ||
| /** | ||
| * @brief Validates that a string contains only numeric digits. | ||
| * | ||
| * This function is used after an integer overflow, | ||
| * where is checks the rest of the string for a non-numeric character. | ||
| * | ||
| * Pure integer overflows during CSV parsing are converted to PyLongObjects, | ||
| * while, if any invalid character is found, it skips integer | ||
| * parsing and tries other conversion methods. | ||
| * | ||
| * @param p_item Pointer to the string to validate for numeric format | ||
| * | ||
| * @return Integer 0 if the remainder of the string contains only digits, | ||
| * otherwise returns the error code for [ERROR_INVALID_CHARS]. | ||
| */ | ||
| static inline int check_for_invalid_char(const char *p_item) { | ||
| while (*p_item != '\0' && isdigit_ascii(*p_item)) { | ||
| p_item++; | ||
| } | ||
|  | ||
| // check if reached the end of string after consuming all digits | ||
| if (*p_item != '\0') { | ||
| return ERROR_INVALID_CHARS; | ||
| } | ||
|  | ||
| return 0; | ||
| } | ||
|  | ||
| int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | ||
| int *error, char tsep) { | ||
| const char *p = p_item; | ||
|  | @@ -1879,6 +1907,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
| d = *++p; | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
|          | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | @@ -1890,6 +1922,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
| d = *++p; | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | @@ -1917,6 +1953,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
|  | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | @@ -1929,6 +1969,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, | |
|  | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | @@ -1997,6 +2041,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, | |
|  | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | @@ -2009,6 +2057,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, | |
|  | ||
| } else { | ||
| *error = ERROR_OVERFLOW; | ||
| int status = check_for_invalid_char(p); | ||
| if (status != 0) { | ||
| *error = status; | ||
| } | ||
| return 0; | ||
| } | ||
| } | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add the length of the string as an argument? I realize this is a static function, but its still best to guard against buffer overruns in case of future refactor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This information is not available in any of the parent functions. So I would have to call
strlento use it. I don't see much value in it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its about minimizing the risk during refactor. C is not an inherently safe language, so you need to be somewhat paranoid when writing functions.
You are correct in that at face value calling
strlenis pretty...well dumb. But its a sign that a refactor can happen in another PR to better keep track of the length of a string while processing it